{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "admission = pd.read_csv(\"hosp/patients.csv.gz\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>gender</th>\n",
       "      <th>anchor_age</th>\n",
       "      <th>anchor_year</th>\n",
       "      <th>anchor_year_group</th>\n",
       "      <th>dod</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>2180</td>\n",
       "      <td>2014 - 2016</td>\n",
       "      <td>2180-09-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000048</td>\n",
       "      <td>F</td>\n",
       "      <td>23</td>\n",
       "      <td>2126</td>\n",
       "      <td>2008 - 2010</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10000068</td>\n",
       "      <td>F</td>\n",
       "      <td>19</td>\n",
       "      <td>2160</td>\n",
       "      <td>2008 - 2010</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10000084</td>\n",
       "      <td>M</td>\n",
       "      <td>72</td>\n",
       "      <td>2160</td>\n",
       "      <td>2017 - 2019</td>\n",
       "      <td>2161-02-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10000102</td>\n",
       "      <td>F</td>\n",
       "      <td>27</td>\n",
       "      <td>2136</td>\n",
       "      <td>2008 - 2010</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id gender  anchor_age  anchor_year anchor_year_group         dod\n",
       "0    10000032      F          52         2180       2014 - 2016  2180-09-09\n",
       "1    10000048      F          23         2126       2008 - 2010         NaN\n",
       "2    10000068      F          19         2160       2008 - 2010         NaN\n",
       "3    10000084      M          72         2160       2017 - 2019  2161-02-13\n",
       "4    10000102      F          27         2136       2008 - 2010         NaN"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "admission.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(admission[\"subject_id\"].value_counts() > 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>first_careunit</th>\n",
       "      <th>last_careunit</th>\n",
       "      <th>intime</th>\n",
       "      <th>outtime</th>\n",
       "      <th>los</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000980</td>\n",
       "      <td>26913865</td>\n",
       "      <td>39765666</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2189-06-27 08:42:00</td>\n",
       "      <td>2189-06-27 20:38:27</td>\n",
       "      <td>0.497535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>2157-11-20 19:18:02</td>\n",
       "      <td>2157-11-21 22:08:00</td>\n",
       "      <td>1.118032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10001217</td>\n",
       "      <td>27703517</td>\n",
       "      <td>34592300</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>2157-12-19 15:42:24</td>\n",
       "      <td>2157-12-20 14:27:41</td>\n",
       "      <td>0.948113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10001725</td>\n",
       "      <td>25563031</td>\n",
       "      <td>31205490</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>2110-04-11 15:52:22</td>\n",
       "      <td>2110-04-12 23:59:56</td>\n",
       "      <td>1.338588</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id  \\\n",
       "0    10000032  29079034  39553978   \n",
       "1    10000980  26913865  39765666   \n",
       "2    10001217  24597018  37067082   \n",
       "3    10001217  27703517  34592300   \n",
       "4    10001725  25563031  31205490   \n",
       "\n",
       "                                     first_careunit  \\\n",
       "0                Medical Intensive Care Unit (MICU)   \n",
       "1                Medical Intensive Care Unit (MICU)   \n",
       "2               Surgical Intensive Care Unit (SICU)   \n",
       "3               Surgical Intensive Care Unit (SICU)   \n",
       "4  Medical/Surgical Intensive Care Unit (MICU/SICU)   \n",
       "\n",
       "                                      last_careunit               intime  \\\n",
       "0                Medical Intensive Care Unit (MICU)  2180-07-23 14:00:00   \n",
       "1                Medical Intensive Care Unit (MICU)  2189-06-27 08:42:00   \n",
       "2               Surgical Intensive Care Unit (SICU)  2157-11-20 19:18:02   \n",
       "3               Surgical Intensive Care Unit (SICU)  2157-12-19 15:42:24   \n",
       "4  Medical/Surgical Intensive Care Unit (MICU/SICU)  2110-04-11 15:52:22   \n",
       "\n",
       "               outtime       los  \n",
       "0  2180-07-23 23:50:47  0.410266  \n",
       "1  2189-06-27 20:38:27  0.497535  \n",
       "2  2157-11-21 22:08:00  1.118032  \n",
       "3  2157-12-20 14:27:41  0.948113  \n",
       "4  2110-04-12 23:59:56  1.338588  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "icu_stay = pd.read_csv(\"icu/icustays.csv.gz\")\n",
    "icu_stay.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('O')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "icu_stay.dtypes\n",
    "icu_stay[\"intime\"].dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>first_careunit</th>\n",
       "      <th>last_careunit</th>\n",
       "      <th>intime</th>\n",
       "      <th>outtime</th>\n",
       "      <th>los</th>\n",
       "      <th>duration</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000980</td>\n",
       "      <td>26913865</td>\n",
       "      <td>39765666</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2189-06-27 08:42:00</td>\n",
       "      <td>2189-06-27 20:38:27</td>\n",
       "      <td>0.497535</td>\n",
       "      <td>0 days 11:56:27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>2157-11-20 19:18:02</td>\n",
       "      <td>2157-11-21 22:08:00</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>1 days 02:49:58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10001217</td>\n",
       "      <td>27703517</td>\n",
       "      <td>34592300</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>2157-12-19 15:42:24</td>\n",
       "      <td>2157-12-20 14:27:41</td>\n",
       "      <td>0.948113</td>\n",
       "      <td>0 days 22:45:17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10001725</td>\n",
       "      <td>25563031</td>\n",
       "      <td>31205490</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>2110-04-11 15:52:22</td>\n",
       "      <td>2110-04-12 23:59:56</td>\n",
       "      <td>1.338588</td>\n",
       "      <td>1 days 08:07:34</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id  \\\n",
       "0    10000032  29079034  39553978   \n",
       "1    10000980  26913865  39765666   \n",
       "2    10001217  24597018  37067082   \n",
       "3    10001217  27703517  34592300   \n",
       "4    10001725  25563031  31205490   \n",
       "\n",
       "                                     first_careunit  \\\n",
       "0                Medical Intensive Care Unit (MICU)   \n",
       "1                Medical Intensive Care Unit (MICU)   \n",
       "2               Surgical Intensive Care Unit (SICU)   \n",
       "3               Surgical Intensive Care Unit (SICU)   \n",
       "4  Medical/Surgical Intensive Care Unit (MICU/SICU)   \n",
       "\n",
       "                                      last_careunit               intime  \\\n",
       "0                Medical Intensive Care Unit (MICU)  2180-07-23 14:00:00   \n",
       "1                Medical Intensive Care Unit (MICU)  2189-06-27 08:42:00   \n",
       "2               Surgical Intensive Care Unit (SICU)  2157-11-20 19:18:02   \n",
       "3               Surgical Intensive Care Unit (SICU)  2157-12-19 15:42:24   \n",
       "4  Medical/Surgical Intensive Care Unit (MICU/SICU)  2110-04-11 15:52:22   \n",
       "\n",
       "               outtime       los        duration  \n",
       "0  2180-07-23 23:50:47  0.410266 0 days 09:50:47  \n",
       "1  2189-06-27 20:38:27  0.497535 0 days 11:56:27  \n",
       "2  2157-11-21 22:08:00  1.118032 1 days 02:49:58  \n",
       "3  2157-12-20 14:27:41  0.948113 0 days 22:45:17  \n",
       "4  2110-04-12 23:59:56  1.338588 1 days 08:07:34  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "icu_stay[\"duration\"] = pd.to_datetime(icu_stay[\"outtime\"]) - pd.to_datetime(icu_stay[\"intime\"])\n",
    "icu_stay.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>first_careunit</th>\n",
       "      <th>last_careunit</th>\n",
       "      <th>intime</th>\n",
       "      <th>outtime</th>\n",
       "      <th>los</th>\n",
       "      <th>duration</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>52340</th>\n",
       "      <td>17153664</td>\n",
       "      <td>20885832</td>\n",
       "      <td>39667768</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2135-09-25 02:29:00</td>\n",
       "      <td>2136-01-13 08:03:29</td>\n",
       "      <td>110.232280</td>\n",
       "      <td>110 days 05:34:29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47919</th>\n",
       "      <td>16534814</td>\n",
       "      <td>25466361</td>\n",
       "      <td>32380519</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>2144-02-14 15:49:37</td>\n",
       "      <td>2144-05-28 03:48:11</td>\n",
       "      <td>103.499005</td>\n",
       "      <td>103 days 11:58:34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32362</th>\n",
       "      <td>14411859</td>\n",
       "      <td>24976204</td>\n",
       "      <td>38018615</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>2162-12-28 22:59:00</td>\n",
       "      <td>2163-04-09 16:24:47</td>\n",
       "      <td>101.726238</td>\n",
       "      <td>101 days 17:25:47</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4999</th>\n",
       "      <td>10699336</td>\n",
       "      <td>25777608</td>\n",
       "      <td>31879957</td>\n",
       "      <td>Trauma SICU (TSICU)</td>\n",
       "      <td>Trauma SICU (TSICU)</td>\n",
       "      <td>2158-01-07 19:25:07</td>\n",
       "      <td>2158-04-17 10:44:29</td>\n",
       "      <td>99.638449</td>\n",
       "      <td>99 days 15:19:22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37632</th>\n",
       "      <td>15149227</td>\n",
       "      <td>27965624</td>\n",
       "      <td>30820506</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>2143-11-27 21:54:37</td>\n",
       "      <td>2144-03-02 18:01:39</td>\n",
       "      <td>95.838218</td>\n",
       "      <td>95 days 20:07:02</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       subject_id   hadm_id   stay_id  \\\n",
       "52340    17153664  20885832  39667768   \n",
       "47919    16534814  25466361  32380519   \n",
       "32362    14411859  24976204  38018615   \n",
       "4999     10699336  25777608  31879957   \n",
       "37632    15149227  27965624  30820506   \n",
       "\n",
       "                                     first_careunit  \\\n",
       "52340            Medical Intensive Care Unit (MICU)   \n",
       "47919  Cardiac Vascular Intensive Care Unit (CVICU)   \n",
       "32362           Surgical Intensive Care Unit (SICU)   \n",
       "4999                            Trauma SICU (TSICU)   \n",
       "37632           Surgical Intensive Care Unit (SICU)   \n",
       "\n",
       "                                      last_careunit               intime  \\\n",
       "52340            Medical Intensive Care Unit (MICU)  2135-09-25 02:29:00   \n",
       "47919  Cardiac Vascular Intensive Care Unit (CVICU)  2144-02-14 15:49:37   \n",
       "32362           Surgical Intensive Care Unit (SICU)  2162-12-28 22:59:00   \n",
       "4999                            Trauma SICU (TSICU)  2158-01-07 19:25:07   \n",
       "37632           Surgical Intensive Care Unit (SICU)  2143-11-27 21:54:37   \n",
       "\n",
       "                   outtime         los          duration  \n",
       "52340  2136-01-13 08:03:29  110.232280 110 days 05:34:29  \n",
       "47919  2144-05-28 03:48:11  103.499005 103 days 11:58:34  \n",
       "32362  2163-04-09 16:24:47  101.726238 101 days 17:25:47  \n",
       "4999   2158-04-17 10:44:29   99.638449  99 days 15:19:22  \n",
       "37632  2144-03-02 18:01:39   95.838218  95 days 20:07:02  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "icu_stay.sort_values(by=\"duration\", ascending=False).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>first_careunit</th>\n",
       "      <th>last_careunit</th>\n",
       "      <th>intime</th>\n",
       "      <th>outtime</th>\n",
       "      <th>los</th>\n",
       "      <th>duration</th>\n",
       "      <th>gender</th>\n",
       "      <th>anchor_age</th>\n",
       "      <th>anchor_year</th>\n",
       "      <th>anchor_year_group</th>\n",
       "      <th>dod</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>2180</td>\n",
       "      <td>2014 - 2016</td>\n",
       "      <td>2180-09-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000980</td>\n",
       "      <td>26913865</td>\n",
       "      <td>39765666</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2189-06-27 08:42:00</td>\n",
       "      <td>2189-06-27 20:38:27</td>\n",
       "      <td>0.497535</td>\n",
       "      <td>0 days 11:56:27</td>\n",
       "      <td>F</td>\n",
       "      <td>73</td>\n",
       "      <td>2186</td>\n",
       "      <td>2008 - 2010</td>\n",
       "      <td>2193-08-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>2157-11-20 19:18:02</td>\n",
       "      <td>2157-11-21 22:08:00</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>1 days 02:49:58</td>\n",
       "      <td>F</td>\n",
       "      <td>55</td>\n",
       "      <td>2157</td>\n",
       "      <td>2011 - 2013</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10001217</td>\n",
       "      <td>27703517</td>\n",
       "      <td>34592300</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>2157-12-19 15:42:24</td>\n",
       "      <td>2157-12-20 14:27:41</td>\n",
       "      <td>0.948113</td>\n",
       "      <td>0 days 22:45:17</td>\n",
       "      <td>F</td>\n",
       "      <td>55</td>\n",
       "      <td>2157</td>\n",
       "      <td>2011 - 2013</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10001725</td>\n",
       "      <td>25563031</td>\n",
       "      <td>31205490</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>2110-04-11 15:52:22</td>\n",
       "      <td>2110-04-12 23:59:56</td>\n",
       "      <td>1.338588</td>\n",
       "      <td>1 days 08:07:34</td>\n",
       "      <td>F</td>\n",
       "      <td>46</td>\n",
       "      <td>2110</td>\n",
       "      <td>2011 - 2013</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id  \\\n",
       "0    10000032  29079034  39553978   \n",
       "1    10000980  26913865  39765666   \n",
       "2    10001217  24597018  37067082   \n",
       "3    10001217  27703517  34592300   \n",
       "4    10001725  25563031  31205490   \n",
       "\n",
       "                                     first_careunit  \\\n",
       "0                Medical Intensive Care Unit (MICU)   \n",
       "1                Medical Intensive Care Unit (MICU)   \n",
       "2               Surgical Intensive Care Unit (SICU)   \n",
       "3               Surgical Intensive Care Unit (SICU)   \n",
       "4  Medical/Surgical Intensive Care Unit (MICU/SICU)   \n",
       "\n",
       "                                      last_careunit               intime  \\\n",
       "0                Medical Intensive Care Unit (MICU)  2180-07-23 14:00:00   \n",
       "1                Medical Intensive Care Unit (MICU)  2189-06-27 08:42:00   \n",
       "2               Surgical Intensive Care Unit (SICU)  2157-11-20 19:18:02   \n",
       "3               Surgical Intensive Care Unit (SICU)  2157-12-19 15:42:24   \n",
       "4  Medical/Surgical Intensive Care Unit (MICU/SICU)  2110-04-11 15:52:22   \n",
       "\n",
       "               outtime       los        duration gender  anchor_age  \\\n",
       "0  2180-07-23 23:50:47  0.410266 0 days 09:50:47      F          52   \n",
       "1  2189-06-27 20:38:27  0.497535 0 days 11:56:27      F          73   \n",
       "2  2157-11-21 22:08:00  1.118032 1 days 02:49:58      F          55   \n",
       "3  2157-12-20 14:27:41  0.948113 0 days 22:45:17      F          55   \n",
       "4  2110-04-12 23:59:56  1.338588 1 days 08:07:34      F          46   \n",
       "\n",
       "   anchor_year anchor_year_group         dod  \n",
       "0         2180       2014 - 2016  2180-09-09  \n",
       "1         2186       2008 - 2010  2193-08-26  \n",
       "2         2157       2011 - 2013         NaN  \n",
       "3         2157       2011 - 2013         NaN  \n",
       "4         2110       2011 - 2013         NaN  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "icu_stay_demographic = pd.merge(icu_stay, admission, on=\"subject_id\", how=\"inner\")\n",
    "icu_stay_demographic.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>first_careunit</th>\n",
       "      <th>last_careunit</th>\n",
       "      <th>intime</th>\n",
       "      <th>outtime</th>\n",
       "      <th>los</th>\n",
       "      <th>duration</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000980</td>\n",
       "      <td>26913865</td>\n",
       "      <td>39765666</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2189-06-27 08:42:00</td>\n",
       "      <td>2189-06-27 20:38:27</td>\n",
       "      <td>0.497535</td>\n",
       "      <td>0 days 11:56:27</td>\n",
       "      <td>F</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>2157-11-20 19:18:02</td>\n",
       "      <td>2157-11-21 22:08:00</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>1 days 02:49:58</td>\n",
       "      <td>F</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10001217</td>\n",
       "      <td>27703517</td>\n",
       "      <td>34592300</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>2157-12-19 15:42:24</td>\n",
       "      <td>2157-12-20 14:27:41</td>\n",
       "      <td>0.948113</td>\n",
       "      <td>0 days 22:45:17</td>\n",
       "      <td>F</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10001725</td>\n",
       "      <td>25563031</td>\n",
       "      <td>31205490</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>2110-04-11 15:52:22</td>\n",
       "      <td>2110-04-12 23:59:56</td>\n",
       "      <td>1.338588</td>\n",
       "      <td>1 days 08:07:34</td>\n",
       "      <td>F</td>\n",
       "      <td>46</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id  \\\n",
       "0    10000032  29079034  39553978   \n",
       "1    10000980  26913865  39765666   \n",
       "2    10001217  24597018  37067082   \n",
       "3    10001217  27703517  34592300   \n",
       "4    10001725  25563031  31205490   \n",
       "\n",
       "                                     first_careunit  \\\n",
       "0                Medical Intensive Care Unit (MICU)   \n",
       "1                Medical Intensive Care Unit (MICU)   \n",
       "2               Surgical Intensive Care Unit (SICU)   \n",
       "3               Surgical Intensive Care Unit (SICU)   \n",
       "4  Medical/Surgical Intensive Care Unit (MICU/SICU)   \n",
       "\n",
       "                                      last_careunit              intime  \\\n",
       "0                Medical Intensive Care Unit (MICU) 2180-07-23 14:00:00   \n",
       "1                Medical Intensive Care Unit (MICU) 2189-06-27 08:42:00   \n",
       "2               Surgical Intensive Care Unit (SICU) 2157-11-20 19:18:02   \n",
       "3               Surgical Intensive Care Unit (SICU) 2157-12-19 15:42:24   \n",
       "4  Medical/Surgical Intensive Care Unit (MICU/SICU) 2110-04-11 15:52:22   \n",
       "\n",
       "               outtime       los        duration gender  age  \n",
       "0  2180-07-23 23:50:47  0.410266 0 days 09:50:47      F   52  \n",
       "1  2189-06-27 20:38:27  0.497535 0 days 11:56:27      F   76  \n",
       "2  2157-11-21 22:08:00  1.118032 1 days 02:49:58      F   55  \n",
       "3  2157-12-20 14:27:41  0.948113 0 days 22:45:17      F   55  \n",
       "4  2110-04-12 23:59:56  1.338588 1 days 08:07:34      F   46  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Convert columns to datetime\n",
    "icu_stay_demographic[\"intime\"] = pd.to_datetime(icu_stay_demographic[\"intime\"])\n",
    "icu_stay_demographic[\"anchor_year\"] = pd.to_numeric(icu_stay_demographic[\"anchor_year\"])\n",
    "\n",
    "# Convert anchor_age to timedelta\n",
    "icu_stay_demographic[\"anchor_age\"] = pd.to_numeric(icu_stay_demographic[\"anchor_age\"])\n",
    "\n",
    "# Calculate the age of ICU patients\n",
    "icu_stay_demographic[\"age\"] = icu_stay_demographic[\"intime\"].dt.year - icu_stay_demographic[\"anchor_year\"] + icu_stay_demographic[\"anchor_age\"]\n",
    "\n",
    "# Drop unnecessary columns\n",
    "icu_stay_demographic = icu_stay_demographic.drop([\"anchor_year\",\"anchor_age\",\"anchor_year_group\",\"dod\"], axis=1)\n",
    "\n",
    "# Display the DataFrame\n",
    "icu_stay_demographic.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Specify the paths to your CSV file and other data file\n",
    "file_path = 'icu/chartevents.csv.gz'\n",
    "\n",
    "# Specify the chunk size (number of rows per chunk)\n",
    "chunk_size = 1000\n",
    "\n",
    "# Initialize an empty list to hold the chunks\n",
    "chunks = []\n",
    "\n",
    "# Loop through the CSV file in chunks\n",
    "for chunk in pd.read_csv(file_path, chunksize=chunk_size):\n",
    "    chunks.append(chunk)\n",
    "\n",
    "# Concatenate the chunks into a single DataFrame\n",
    "ce = pd.concat(chunks, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>caregiver_id</th>\n",
       "      <th>charttime</th>\n",
       "      <th>storetime</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "      <th>warning</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>47007.0</td>\n",
       "      <td>2180-07-23 21:01:00</td>\n",
       "      <td>2180-07-23 22:15:00</td>\n",
       "      <td>220179</td>\n",
       "      <td>82</td>\n",
       "      <td>82.0</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>47007.0</td>\n",
       "      <td>2180-07-23 21:01:00</td>\n",
       "      <td>2180-07-23 22:15:00</td>\n",
       "      <td>220180</td>\n",
       "      <td>59</td>\n",
       "      <td>59.0</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>47007.0</td>\n",
       "      <td>2180-07-23 21:01:00</td>\n",
       "      <td>2180-07-23 22:15:00</td>\n",
       "      <td>220181</td>\n",
       "      <td>63</td>\n",
       "      <td>63.0</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>47007.0</td>\n",
       "      <td>2180-07-23 22:00:00</td>\n",
       "      <td>2180-07-23 22:15:00</td>\n",
       "      <td>220045</td>\n",
       "      <td>94</td>\n",
       "      <td>94.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>47007.0</td>\n",
       "      <td>2180-07-23 22:00:00</td>\n",
       "      <td>2180-07-23 22:15:00</td>\n",
       "      <td>220179</td>\n",
       "      <td>85</td>\n",
       "      <td>85.0</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id  caregiver_id            charttime  \\\n",
       "0    10000032  29079034  39553978       47007.0  2180-07-23 21:01:00   \n",
       "1    10000032  29079034  39553978       47007.0  2180-07-23 21:01:00   \n",
       "2    10000032  29079034  39553978       47007.0  2180-07-23 21:01:00   \n",
       "3    10000032  29079034  39553978       47007.0  2180-07-23 22:00:00   \n",
       "4    10000032  29079034  39553978       47007.0  2180-07-23 22:00:00   \n",
       "\n",
       "             storetime  itemid value  valuenum valueuom  warning  \n",
       "0  2180-07-23 22:15:00  220179    82      82.0     mmHg      0.0  \n",
       "1  2180-07-23 22:15:00  220180    59      59.0     mmHg      0.0  \n",
       "2  2180-07-23 22:15:00  220181    63      63.0     mmHg      0.0  \n",
       "3  2180-07-23 22:15:00  220045    94      94.0      bpm      0.0  \n",
       "4  2180-07-23 22:15:00  220179    85      85.0     mmHg      0.0  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ce.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>itemid</th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>param_type</th>\n",
       "      <th>lownormalvalue</th>\n",
       "      <th>highnormalvalue</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>220001</td>\n",
       "      <td>Problem List</td>\n",
       "      <td>Problem List</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>General</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Text</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>220003</td>\n",
       "      <td>ICU Admission date</td>\n",
       "      <td>ICU Admission date</td>\n",
       "      <td>datetimeevents</td>\n",
       "      <td>ADT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Date and time</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>220045</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>HR</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>bpm</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>220046</td>\n",
       "      <td>Heart rate Alarm - High</td>\n",
       "      <td>HR Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>bpm</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>220047</td>\n",
       "      <td>Heart Rate Alarm - Low</td>\n",
       "      <td>HR Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>bpm</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   itemid                    label        abbreviation         linksto  \\\n",
       "0  220001             Problem List        Problem List     chartevents   \n",
       "1  220003       ICU Admission date  ICU Admission date  datetimeevents   \n",
       "2  220045               Heart Rate                  HR     chartevents   \n",
       "3  220046  Heart rate Alarm - High     HR Alarm - High     chartevents   \n",
       "4  220047   Heart Rate Alarm - Low      HR Alarm - Low     chartevents   \n",
       "\n",
       "              category unitname     param_type  lownormalvalue  \\\n",
       "0              General      NaN           Text             NaN   \n",
       "1                  ADT      NaN  Date and time             NaN   \n",
       "2  Routine Vital Signs      bpm        Numeric             NaN   \n",
       "3               Alarms      bpm        Numeric             NaN   \n",
       "4               Alarms      bpm        Numeric             NaN   \n",
       "\n",
       "   highnormalvalue  \n",
       "0              NaN  \n",
       "1              NaN  \n",
       "2              NaN  \n",
       "3              NaN  \n",
       "4              NaN  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# load item id on chart events\n",
    "item_id = pd.read_csv(\"icu/d_items.csv.gz\")\n",
    "item_id.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>itemid</th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>param_type</th>\n",
       "      <th>lownormalvalue</th>\n",
       "      <th>highnormalvalue</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>220045</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>HR</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>bpm</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   itemid       label abbreviation      linksto             category unitname  \\\n",
       "2  220045  Heart Rate           HR  chartevents  Routine Vital Signs      bpm   \n",
       "\n",
       "  param_type  lownormalvalue  highnormalvalue  \n",
       "2    Numeric             NaN              NaN  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# HR id\n",
    "item_id[item_id[\"label\"] == \"Heart Rate\"].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>itemid</th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>param_type</th>\n",
       "      <th>lownormalvalue</th>\n",
       "      <th>highnormalvalue</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>220050</td>\n",
       "      <td>Arterial Blood Pressure systolic</td>\n",
       "      <td>ABPs</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>90.0</td>\n",
       "      <td>140.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>220059</td>\n",
       "      <td>Pulmonary Artery Pressure systolic</td>\n",
       "      <td>PAPs</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>15.0</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>220179</td>\n",
       "      <td>Non Invasive Blood Pressure systolic</td>\n",
       "      <td>NBPs</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1995</th>\n",
       "      <td>226850</td>\n",
       "      <td>RV systolic pressure(PA Line)</td>\n",
       "      <td>RV systolic pressure(PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1997</th>\n",
       "      <td>226852</td>\n",
       "      <td>PA systolic pressure(PA Line)</td>\n",
       "      <td>PA systolic pressure(PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      itemid                                 label  \\\n",
       "6     220050      Arterial Blood Pressure systolic   \n",
       "11    220059    Pulmonary Artery Pressure systolic   \n",
       "24    220179  Non Invasive Blood Pressure systolic   \n",
       "1995  226850         RV systolic pressure(PA Line)   \n",
       "1997  226852         PA systolic pressure(PA Line)   \n",
       "\n",
       "                       abbreviation      linksto             category  \\\n",
       "6                              ABPs  chartevents  Routine Vital Signs   \n",
       "11                             PAPs  chartevents         Hemodynamics   \n",
       "24                             NBPs  chartevents  Routine Vital Signs   \n",
       "1995  RV systolic pressure(PA Line)  chartevents    PA Line Insertion   \n",
       "1997  PA systolic pressure(PA Line)  chartevents    PA Line Insertion   \n",
       "\n",
       "     unitname param_type  lownormalvalue  highnormalvalue  \n",
       "6        mmHg    Numeric            90.0            140.0  \n",
       "11       mmHg    Numeric            15.0             25.0  \n",
       "24       mmHg    Numeric             NaN              NaN  \n",
       "1995     mmHg    Numeric             NaN              NaN  \n",
       "1997     mmHg    Numeric             NaN              NaN  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Systolic id\n",
    "item_id[item_id[\"label\"].str.contains(\"systolic\")].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>itemid</th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>param_type</th>\n",
       "      <th>lownormalvalue</th>\n",
       "      <th>highnormalvalue</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>220051</td>\n",
       "      <td>Arterial Blood Pressure diastolic</td>\n",
       "      <td>ABPd</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>60.0</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>220060</td>\n",
       "      <td>Pulmonary Artery Pressure diastolic</td>\n",
       "      <td>PAPd</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>8.0</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>220180</td>\n",
       "      <td>Non Invasive Blood Pressure diastolic</td>\n",
       "      <td>NBPd</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996</th>\n",
       "      <td>226851</td>\n",
       "      <td>RV diastolic pressure(PA Line)</td>\n",
       "      <td>RV diastolic pressure(PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1998</th>\n",
       "      <td>226853</td>\n",
       "      <td>PA diastolic pressure(PA Line)</td>\n",
       "      <td>PA diastolic pressure(PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>Numeric</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      itemid                                  label  \\\n",
       "7     220051      Arterial Blood Pressure diastolic   \n",
       "12    220060    Pulmonary Artery Pressure diastolic   \n",
       "25    220180  Non Invasive Blood Pressure diastolic   \n",
       "1996  226851         RV diastolic pressure(PA Line)   \n",
       "1998  226853         PA diastolic pressure(PA Line)   \n",
       "\n",
       "                        abbreviation      linksto             category  \\\n",
       "7                               ABPd  chartevents  Routine Vital Signs   \n",
       "12                              PAPd  chartevents         Hemodynamics   \n",
       "25                              NBPd  chartevents  Routine Vital Signs   \n",
       "1996  RV diastolic pressure(PA Line)  chartevents    PA Line Insertion   \n",
       "1998  PA diastolic pressure(PA Line)  chartevents    PA Line Insertion   \n",
       "\n",
       "     unitname param_type  lownormalvalue  highnormalvalue  \n",
       "7        mmHg    Numeric            60.0             90.0  \n",
       "12       mmHg    Numeric             8.0             15.0  \n",
       "25       mmHg    Numeric             NaN              NaN  \n",
       "1996     mmHg    Numeric             NaN              NaN  \n",
       "1998     mmHg    Numeric             NaN              NaN  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Diastolic id\n",
    "item_id[item_id[\"label\"].str.contains(\"diastolic\")].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>caregiver_id</th>\n",
       "      <th>charttime</th>\n",
       "      <th>storetime</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "      <th>warning</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>47007.0</td>\n",
       "      <td>2180-07-23 22:00:00</td>\n",
       "      <td>2180-07-23 22:15:00</td>\n",
       "      <td>220045</td>\n",
       "      <td>94</td>\n",
       "      <td>94.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>66056.0</td>\n",
       "      <td>2180-07-23 19:00:00</td>\n",
       "      <td>2180-07-23 19:59:00</td>\n",
       "      <td>220045</td>\n",
       "      <td>97</td>\n",
       "      <td>97.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>66056.0</td>\n",
       "      <td>2180-07-23 20:00:00</td>\n",
       "      <td>2180-07-23 21:01:00</td>\n",
       "      <td>220045</td>\n",
       "      <td>100</td>\n",
       "      <td>100.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>66056.0</td>\n",
       "      <td>2180-07-23 21:00:00</td>\n",
       "      <td>2180-07-23 21:01:00</td>\n",
       "      <td>220045</td>\n",
       "      <td>94</td>\n",
       "      <td>94.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>166</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>88981.0</td>\n",
       "      <td>2180-07-23 14:12:00</td>\n",
       "      <td>2180-07-23 14:17:00</td>\n",
       "      <td>220045</td>\n",
       "      <td>91</td>\n",
       "      <td>91.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     subject_id   hadm_id   stay_id  caregiver_id            charttime  \\\n",
       "3      10000032  29079034  39553978       47007.0  2180-07-23 22:00:00   \n",
       "9      10000032  29079034  39553978       66056.0  2180-07-23 19:00:00   \n",
       "19     10000032  29079034  39553978       66056.0  2180-07-23 20:00:00   \n",
       "60     10000032  29079034  39553978       66056.0  2180-07-23 21:00:00   \n",
       "166    10000032  29079034  39553978       88981.0  2180-07-23 14:12:00   \n",
       "\n",
       "               storetime  itemid value  valuenum valueuom  warning  \n",
       "3    2180-07-23 22:15:00  220045    94      94.0      bpm      0.0  \n",
       "9    2180-07-23 19:59:00  220045    97      97.0      bpm      0.0  \n",
       "19   2180-07-23 21:01:00  220045   100     100.0      bpm      0.0  \n",
       "60   2180-07-23 21:01:00  220045    94      94.0      bpm      0.0  \n",
       "166  2180-07-23 14:17:00  220045    91      91.0      bpm      0.0  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# filter ce with only hr, systolic, and diastolic values\n",
    "ce_hr_sys_dia = ce[ce[\"itemid\"].isin([220045, 220050, 220051])]\n",
    "ce_hr_sys_dia.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>first_careunit</th>\n",
       "      <th>last_careunit</th>\n",
       "      <th>intime</th>\n",
       "      <th>outtime</th>\n",
       "      <th>los</th>\n",
       "      <th>duration</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>caregiver_id</th>\n",
       "      <th>charttime</th>\n",
       "      <th>storetime</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "      <th>warning</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>47007.0</td>\n",
       "      <td>2180-07-23 22:00:00</td>\n",
       "      <td>2180-07-23 22:15:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>94</td>\n",
       "      <td>94.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>66056.0</td>\n",
       "      <td>2180-07-23 19:00:00</td>\n",
       "      <td>2180-07-23 19:59:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>97</td>\n",
       "      <td>97.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>66056.0</td>\n",
       "      <td>2180-07-23 20:00:00</td>\n",
       "      <td>2180-07-23 21:01:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>100</td>\n",
       "      <td>100.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>66056.0</td>\n",
       "      <td>2180-07-23 21:00:00</td>\n",
       "      <td>2180-07-23 21:01:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>94</td>\n",
       "      <td>94.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>88981.0</td>\n",
       "      <td>2180-07-23 14:12:00</td>\n",
       "      <td>2180-07-23 14:17:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>91</td>\n",
       "      <td>91.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id                      first_careunit  \\\n",
       "0    10000032  29079034  39553978  Medical Intensive Care Unit (MICU)   \n",
       "1    10000032  29079034  39553978  Medical Intensive Care Unit (MICU)   \n",
       "2    10000032  29079034  39553978  Medical Intensive Care Unit (MICU)   \n",
       "3    10000032  29079034  39553978  Medical Intensive Care Unit (MICU)   \n",
       "4    10000032  29079034  39553978  Medical Intensive Care Unit (MICU)   \n",
       "\n",
       "                        last_careunit              intime  \\\n",
       "0  Medical Intensive Care Unit (MICU) 2180-07-23 14:00:00   \n",
       "1  Medical Intensive Care Unit (MICU) 2180-07-23 14:00:00   \n",
       "2  Medical Intensive Care Unit (MICU) 2180-07-23 14:00:00   \n",
       "3  Medical Intensive Care Unit (MICU) 2180-07-23 14:00:00   \n",
       "4  Medical Intensive Care Unit (MICU) 2180-07-23 14:00:00   \n",
       "\n",
       "               outtime       los        duration gender  age  caregiver_id  \\\n",
       "0  2180-07-23 23:50:47  0.410266 0 days 09:50:47      F   52       47007.0   \n",
       "1  2180-07-23 23:50:47  0.410266 0 days 09:50:47      F   52       66056.0   \n",
       "2  2180-07-23 23:50:47  0.410266 0 days 09:50:47      F   52       66056.0   \n",
       "3  2180-07-23 23:50:47  0.410266 0 days 09:50:47      F   52       66056.0   \n",
       "4  2180-07-23 23:50:47  0.410266 0 days 09:50:47      F   52       88981.0   \n",
       "\n",
       "             charttime            storetime    itemid value  valuenum  \\\n",
       "0  2180-07-23 22:00:00  2180-07-23 22:15:00  220045.0    94      94.0   \n",
       "1  2180-07-23 19:00:00  2180-07-23 19:59:00  220045.0    97      97.0   \n",
       "2  2180-07-23 20:00:00  2180-07-23 21:01:00  220045.0   100     100.0   \n",
       "3  2180-07-23 21:00:00  2180-07-23 21:01:00  220045.0    94      94.0   \n",
       "4  2180-07-23 14:12:00  2180-07-23 14:17:00  220045.0    91      91.0   \n",
       "\n",
       "  valueuom  warning  \n",
       "0      bpm      0.0  \n",
       "1      bpm      0.0  \n",
       "2      bpm      0.0  \n",
       "3      bpm      0.0  \n",
       "4      bpm      0.0  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# merge hr, systolic, and diastolic with icu patients table\n",
    "icu_demo_ce = pd.merge(icu_stay_demographic, ce_hr_sys_dia, on=[\"subject_id\", \"hadm_id\", \"stay_id\"], how=\"left\")\n",
    "icu_demo_ce.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save the merged DataFrame as a CSV file\n",
    "icu_demo_ce.to_csv('icu_demo_ce.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "icu_demo_ce = pd.read_csv('icu_demo_ce.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>first_careunit</th>\n",
       "      <th>last_careunit</th>\n",
       "      <th>intime</th>\n",
       "      <th>outtime</th>\n",
       "      <th>los</th>\n",
       "      <th>duration</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>caregiver_id</th>\n",
       "      <th>charttime</th>\n",
       "      <th>storetime</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "      <th>warning</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>47007.0</td>\n",
       "      <td>2180-07-23 22:00:00</td>\n",
       "      <td>2180-07-23 22:15:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>66056.0</td>\n",
       "      <td>2180-07-23 19:00:00</td>\n",
       "      <td>2180-07-23 19:59:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>97.0</td>\n",
       "      <td>97.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>66056.0</td>\n",
       "      <td>2180-07-23 20:00:00</td>\n",
       "      <td>2180-07-23 21:01:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>66056.0</td>\n",
       "      <td>2180-07-23 21:00:00</td>\n",
       "      <td>2180-07-23 21:01:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>88981.0</td>\n",
       "      <td>2180-07-23 14:12:00</td>\n",
       "      <td>2180-07-23 14:17:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id                      first_careunit  \\\n",
       "0    10000032  29079034  39553978  Medical Intensive Care Unit (MICU)   \n",
       "1    10000032  29079034  39553978  Medical Intensive Care Unit (MICU)   \n",
       "2    10000032  29079034  39553978  Medical Intensive Care Unit (MICU)   \n",
       "3    10000032  29079034  39553978  Medical Intensive Care Unit (MICU)   \n",
       "4    10000032  29079034  39553978  Medical Intensive Care Unit (MICU)   \n",
       "\n",
       "                        last_careunit               intime  \\\n",
       "0  Medical Intensive Care Unit (MICU)  2180-07-23 14:00:00   \n",
       "1  Medical Intensive Care Unit (MICU)  2180-07-23 14:00:00   \n",
       "2  Medical Intensive Care Unit (MICU)  2180-07-23 14:00:00   \n",
       "3  Medical Intensive Care Unit (MICU)  2180-07-23 14:00:00   \n",
       "4  Medical Intensive Care Unit (MICU)  2180-07-23 14:00:00   \n",
       "\n",
       "               outtime       los         duration gender  age  caregiver_id  \\\n",
       "0  2180-07-23 23:50:47  0.410266  0 days 09:50:47      F   52       47007.0   \n",
       "1  2180-07-23 23:50:47  0.410266  0 days 09:50:47      F   52       66056.0   \n",
       "2  2180-07-23 23:50:47  0.410266  0 days 09:50:47      F   52       66056.0   \n",
       "3  2180-07-23 23:50:47  0.410266  0 days 09:50:47      F   52       66056.0   \n",
       "4  2180-07-23 23:50:47  0.410266  0 days 09:50:47      F   52       88981.0   \n",
       "\n",
       "             charttime            storetime    itemid  value  valuenum  \\\n",
       "0  2180-07-23 22:00:00  2180-07-23 22:15:00  220045.0   94.0      94.0   \n",
       "1  2180-07-23 19:00:00  2180-07-23 19:59:00  220045.0   97.0      97.0   \n",
       "2  2180-07-23 20:00:00  2180-07-23 21:01:00  220045.0  100.0     100.0   \n",
       "3  2180-07-23 21:00:00  2180-07-23 21:01:00  220045.0   94.0      94.0   \n",
       "4  2180-07-23 14:12:00  2180-07-23 14:17:00  220045.0   91.0      91.0   \n",
       "\n",
       "  valueuom  warning  \n",
       "0      bpm      0.0  \n",
       "1      bpm      0.0  \n",
       "2      bpm      0.0  \n",
       "3      bpm      0.0  \n",
       "4      bpm      0.0  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "icu_demo_ce.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-35-7bc19c7c96db>:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
      "  filtered_df = icu_demo_ce[icu_demo_ce.sort_values(by='charttime').groupby(['subject_id', 'hadm_id', 'stay_id'])['itemid'].cumcount() < 2]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>first_careunit</th>\n",
       "      <th>last_careunit</th>\n",
       "      <th>intime</th>\n",
       "      <th>outtime</th>\n",
       "      <th>los</th>\n",
       "      <th>duration</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>caregiver_id</th>\n",
       "      <th>charttime</th>\n",
       "      <th>storetime</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "      <th>warning</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>88981.0</td>\n",
       "      <td>2180-07-23 14:12:00</td>\n",
       "      <td>2180-07-23 14:17:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2180-07-23 14:00:00</td>\n",
       "      <td>2180-07-23 23:50:47</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>0 days 09:50:47</td>\n",
       "      <td>F</td>\n",
       "      <td>52</td>\n",
       "      <td>88981.0</td>\n",
       "      <td>2180-07-23 14:30:00</td>\n",
       "      <td>2180-07-23 14:43:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>93.0</td>\n",
       "      <td>93.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10000980</td>\n",
       "      <td>26913865</td>\n",
       "      <td>39765666</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2189-06-27 08:42:00</td>\n",
       "      <td>2189-06-27 20:38:27</td>\n",
       "      <td>0.497535</td>\n",
       "      <td>0 days 11:56:27</td>\n",
       "      <td>F</td>\n",
       "      <td>76</td>\n",
       "      <td>36518.0</td>\n",
       "      <td>2189-06-27 08:56:00</td>\n",
       "      <td>2189-06-27 09:07:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>77.0</td>\n",
       "      <td>77.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>10000980</td>\n",
       "      <td>26913865</td>\n",
       "      <td>39765666</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2189-06-27 08:42:00</td>\n",
       "      <td>2189-06-27 20:38:27</td>\n",
       "      <td>0.497535</td>\n",
       "      <td>0 days 11:56:27</td>\n",
       "      <td>F</td>\n",
       "      <td>76</td>\n",
       "      <td>36518.0</td>\n",
       "      <td>2189-06-27 09:04:00</td>\n",
       "      <td>2189-06-27 09:07:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>Surgical Intensive Care Unit (SICU)</td>\n",
       "      <td>2157-11-20 19:18:02</td>\n",
       "      <td>2157-11-21 22:08:00</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>1 days 02:49:58</td>\n",
       "      <td>F</td>\n",
       "      <td>55</td>\n",
       "      <td>84976.0</td>\n",
       "      <td>2157-11-20 19:19:00</td>\n",
       "      <td>2157-11-20 19:30:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    subject_id   hadm_id   stay_id                       first_careunit  \\\n",
       "4     10000032  29079034  39553978   Medical Intensive Care Unit (MICU)   \n",
       "5     10000032  29079034  39553978   Medical Intensive Care Unit (MICU)   \n",
       "10    10000980  26913865  39765666   Medical Intensive Care Unit (MICU)   \n",
       "11    10000980  26913865  39765666   Medical Intensive Care Unit (MICU)   \n",
       "37    10001217  24597018  37067082  Surgical Intensive Care Unit (SICU)   \n",
       "\n",
       "                          last_careunit               intime  \\\n",
       "4    Medical Intensive Care Unit (MICU)  2180-07-23 14:00:00   \n",
       "5    Medical Intensive Care Unit (MICU)  2180-07-23 14:00:00   \n",
       "10   Medical Intensive Care Unit (MICU)  2189-06-27 08:42:00   \n",
       "11   Medical Intensive Care Unit (MICU)  2189-06-27 08:42:00   \n",
       "37  Surgical Intensive Care Unit (SICU)  2157-11-20 19:18:02   \n",
       "\n",
       "                outtime       los         duration gender  age  caregiver_id  \\\n",
       "4   2180-07-23 23:50:47  0.410266  0 days 09:50:47      F   52       88981.0   \n",
       "5   2180-07-23 23:50:47  0.410266  0 days 09:50:47      F   52       88981.0   \n",
       "10  2189-06-27 20:38:27  0.497535  0 days 11:56:27      F   76       36518.0   \n",
       "11  2189-06-27 20:38:27  0.497535  0 days 11:56:27      F   76       36518.0   \n",
       "37  2157-11-21 22:08:00  1.118032  1 days 02:49:58      F   55       84976.0   \n",
       "\n",
       "              charttime            storetime    itemid  value  valuenum  \\\n",
       "4   2180-07-23 14:12:00  2180-07-23 14:17:00  220045.0   91.0      91.0   \n",
       "5   2180-07-23 14:30:00  2180-07-23 14:43:00  220045.0   93.0      93.0   \n",
       "10  2189-06-27 08:56:00  2189-06-27 09:07:00  220045.0   77.0      77.0   \n",
       "11  2189-06-27 09:04:00  2189-06-27 09:07:00  220045.0   75.0      75.0   \n",
       "37  2157-11-20 19:19:00  2157-11-20 19:30:00  220045.0   86.0      86.0   \n",
       "\n",
       "   valueuom  warning  \n",
       "4       bpm      0.0  \n",
       "5       bpm      0.0  \n",
       "10      bpm      0.0  \n",
       "11      bpm      0.0  \n",
       "37      bpm      0.0  "
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_df = icu_demo_ce[icu_demo_ce.sort_values(by='charttime').groupby(['subject_id', 'hadm_id', 'stay_id'])['itemid'].cumcount() < 2]\n",
    "filtered_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>first_careunit</th>\n",
       "      <th>last_careunit</th>\n",
       "      <th>intime</th>\n",
       "      <th>outtime</th>\n",
       "      <th>los</th>\n",
       "      <th>duration</th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>caregiver_id</th>\n",
       "      <th>charttime</th>\n",
       "      <th>storetime</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "      <th>warning</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8916714</th>\n",
       "      <td>18106347</td>\n",
       "      <td>24305596</td>\n",
       "      <td>30588857</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>2110-01-11 10:16:06</td>\n",
       "      <td>2110-01-12 17:17:47</td>\n",
       "      <td>1.292836</td>\n",
       "      <td>1 days 07:01:41</td>\n",
       "      <td>F</td>\n",
       "      <td>48</td>\n",
       "      <td>28791.0</td>\n",
       "      <td>2110-01-11 12:42:00</td>\n",
       "      <td>2110-01-11 12:49:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8916715</th>\n",
       "      <td>18106347</td>\n",
       "      <td>24305596</td>\n",
       "      <td>30588857</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>2110-01-11 10:16:06</td>\n",
       "      <td>2110-01-12 17:17:47</td>\n",
       "      <td>1.292836</td>\n",
       "      <td>1 days 07:01:41</td>\n",
       "      <td>F</td>\n",
       "      <td>48</td>\n",
       "      <td>28791.0</td>\n",
       "      <td>2110-01-11 12:42:00</td>\n",
       "      <td>2110-01-11 12:49:00</td>\n",
       "      <td>220050.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8916716</th>\n",
       "      <td>18106347</td>\n",
       "      <td>24305596</td>\n",
       "      <td>30588857</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>2110-01-11 10:16:06</td>\n",
       "      <td>2110-01-12 17:17:47</td>\n",
       "      <td>1.292836</td>\n",
       "      <td>1 days 07:01:41</td>\n",
       "      <td>F</td>\n",
       "      <td>48</td>\n",
       "      <td>28791.0</td>\n",
       "      <td>2110-01-11 12:42:00</td>\n",
       "      <td>2110-01-11 12:49:00</td>\n",
       "      <td>220051.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8916717</th>\n",
       "      <td>18106347</td>\n",
       "      <td>24305596</td>\n",
       "      <td>30588857</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>2110-01-11 10:16:06</td>\n",
       "      <td>2110-01-12 17:17:47</td>\n",
       "      <td>1.292836</td>\n",
       "      <td>1 days 07:01:41</td>\n",
       "      <td>F</td>\n",
       "      <td>48</td>\n",
       "      <td>28791.0</td>\n",
       "      <td>2110-01-11 12:45:00</td>\n",
       "      <td>2110-01-11 13:28:00</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>93.0</td>\n",
       "      <td>93.0</td>\n",
       "      <td>bpm</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8916718</th>\n",
       "      <td>18106347</td>\n",
       "      <td>24305596</td>\n",
       "      <td>30588857</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>2110-01-11 10:16:06</td>\n",
       "      <td>2110-01-12 17:17:47</td>\n",
       "      <td>1.292836</td>\n",
       "      <td>1 days 07:01:41</td>\n",
       "      <td>F</td>\n",
       "      <td>48</td>\n",
       "      <td>28791.0</td>\n",
       "      <td>2110-01-11 12:45:00</td>\n",
       "      <td>2110-01-11 13:28:00</td>\n",
       "      <td>220050.0</td>\n",
       "      <td>108.0</td>\n",
       "      <td>108.0</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         subject_id   hadm_id   stay_id  \\\n",
       "8916714    18106347  24305596  30588857   \n",
       "8916715    18106347  24305596  30588857   \n",
       "8916716    18106347  24305596  30588857   \n",
       "8916717    18106347  24305596  30588857   \n",
       "8916718    18106347  24305596  30588857   \n",
       "\n",
       "                                       first_careunit  \\\n",
       "8916714  Cardiac Vascular Intensive Care Unit (CVICU)   \n",
       "8916715  Cardiac Vascular Intensive Care Unit (CVICU)   \n",
       "8916716  Cardiac Vascular Intensive Care Unit (CVICU)   \n",
       "8916717  Cardiac Vascular Intensive Care Unit (CVICU)   \n",
       "8916718  Cardiac Vascular Intensive Care Unit (CVICU)   \n",
       "\n",
       "                                        last_careunit               intime  \\\n",
       "8916714  Cardiac Vascular Intensive Care Unit (CVICU)  2110-01-11 10:16:06   \n",
       "8916715  Cardiac Vascular Intensive Care Unit (CVICU)  2110-01-11 10:16:06   \n",
       "8916716  Cardiac Vascular Intensive Care Unit (CVICU)  2110-01-11 10:16:06   \n",
       "8916717  Cardiac Vascular Intensive Care Unit (CVICU)  2110-01-11 10:16:06   \n",
       "8916718  Cardiac Vascular Intensive Care Unit (CVICU)  2110-01-11 10:16:06   \n",
       "\n",
       "                     outtime       los         duration gender  age  \\\n",
       "8916714  2110-01-12 17:17:47  1.292836  1 days 07:01:41      F   48   \n",
       "8916715  2110-01-12 17:17:47  1.292836  1 days 07:01:41      F   48   \n",
       "8916716  2110-01-12 17:17:47  1.292836  1 days 07:01:41      F   48   \n",
       "8916717  2110-01-12 17:17:47  1.292836  1 days 07:01:41      F   48   \n",
       "8916718  2110-01-12 17:17:47  1.292836  1 days 07:01:41      F   48   \n",
       "\n",
       "         caregiver_id            charttime            storetime    itemid  \\\n",
       "8916714       28791.0  2110-01-11 12:42:00  2110-01-11 12:49:00  220045.0   \n",
       "8916715       28791.0  2110-01-11 12:42:00  2110-01-11 12:49:00  220050.0   \n",
       "8916716       28791.0  2110-01-11 12:42:00  2110-01-11 12:49:00  220051.0   \n",
       "8916717       28791.0  2110-01-11 12:45:00  2110-01-11 13:28:00  220045.0   \n",
       "8916718       28791.0  2110-01-11 12:45:00  2110-01-11 13:28:00  220050.0   \n",
       "\n",
       "         value  valuenum valueuom  warning  \n",
       "8916714   94.0      94.0      bpm      0.0  \n",
       "8916715  120.0     120.0     mmHg      0.0  \n",
       "8916716   75.0      75.0     mmHg      0.0  \n",
       "8916717   93.0      93.0      bpm      0.0  \n",
       "8916718  108.0     108.0     mmHg      0.0  "
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sort observations based on time\n",
    "icu_demo_ce_sorted = icu_demo_ce.copy()\n",
    "\n",
    "icu_demo_ce_sorted = test_df.sort_values(by=\"charttime\")\n",
    "\n",
    "icu_demo_ce_sorted.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>92.0</td>\n",
       "      <td>52</td>\n",
       "      <td>F</td>\n",
       "      <td>0.410266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10000980</td>\n",
       "      <td>26913865</td>\n",
       "      <td>39765666</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>76</td>\n",
       "      <td>F</td>\n",
       "      <td>0.497535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>88.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>1.118032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>10001217</td>\n",
       "      <td>27703517</td>\n",
       "      <td>34592300</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>84.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>0.948113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>10001725</td>\n",
       "      <td>25563031</td>\n",
       "      <td>31205490</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>70.5</td>\n",
       "      <td>46</td>\n",
       "      <td>F</td>\n",
       "      <td>1.338588</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    subject_id   hadm_id   stay_id    itemid  value  age gender       los\n",
       "0     10000032  29079034  39553978  220045.0   92.0   52      F  0.410266\n",
       "10    10000980  26913865  39765666  220045.0   76.0   76      F  0.497535\n",
       "21    10001217  24597018  37067082  220045.0   88.5   55      F  1.118032\n",
       "48    10001217  27703517  34592300  220045.0   84.5   55      F  0.948113\n",
       "73    10001725  25563031  31205490  220045.0   70.5   46      F  1.338588"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# First, create a new DataFrame with the average of the first two observations of 'value' for each group\n",
    "average_df = icu_demo_ce_sorted[icu_demo_ce_sorted.groupby(['subject_id', 'hadm_id', 'stay_id'])['itemid'].cumcount() < 2].groupby(['subject_id', 'hadm_id', 'stay_id','itemid'])['value'].mean().reset_index()\n",
    "\n",
    "# You can also include other columns if needed by merging with the original DataFrame\n",
    "result_df = average_df.merge(icu_demo_ce_sorted[['subject_id', 'hadm_id', 'stay_id', 'age', 'gender', 'los']], on=['subject_id', 'hadm_id', 'stay_id'], how='left')\n",
    "\n",
    "# Drop any duplicate rows (if necessary)\n",
    "result_df = result_df.drop_duplicates()\n",
    "\n",
    "# Print the result\n",
    "result_df.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "item_id = pd.read_csv('icu/d_items.csv.gz')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_df = pd.merge(result_df, item_id[[\"itemid\",\"label\",\"unitname\"]], on=\"itemid\", how=\"inner\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "      <th>label</th>\n",
       "      <th>unitname</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>39553978</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>92.0</td>\n",
       "      <td>52</td>\n",
       "      <td>F</td>\n",
       "      <td>0.410266</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000980</td>\n",
       "      <td>26913865</td>\n",
       "      <td>39765666</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>76</td>\n",
       "      <td>F</td>\n",
       "      <td>0.497535</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>88.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10001217</td>\n",
       "      <td>27703517</td>\n",
       "      <td>34592300</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>84.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>0.948113</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10001725</td>\n",
       "      <td>25563031</td>\n",
       "      <td>31205490</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>70.5</td>\n",
       "      <td>46</td>\n",
       "      <td>F</td>\n",
       "      <td>1.338588</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id    itemid  value  age gender       los  \\\n",
       "0    10000032  29079034  39553978  220045.0   92.0   52      F  0.410266   \n",
       "1    10000980  26913865  39765666  220045.0   76.0   76      F  0.497535   \n",
       "2    10001217  24597018  37067082  220045.0   88.5   55      F  1.118032   \n",
       "3    10001217  27703517  34592300  220045.0   84.5   55      F  0.948113   \n",
       "4    10001725  25563031  31205490  220045.0   70.5   46      F  1.338588   \n",
       "\n",
       "        label unitname  \n",
       "0  Heart Rate      bpm  \n",
       "1  Heart Rate      bpm  \n",
       "2  Heart Rate      bpm  \n",
       "3  Heart Rate      bpm  \n",
       "4  Heart Rate      bpm  "
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# hr resutls\n",
    "result_df[result_df['itemid']==220045].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "      <th>label</th>\n",
       "      <th>unitname</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>67259</th>\n",
       "      <td>10002013</td>\n",
       "      <td>23581541</td>\n",
       "      <td>39060235</td>\n",
       "      <td>220050.0</td>\n",
       "      <td>134.0</td>\n",
       "      <td>57</td>\n",
       "      <td>F</td>\n",
       "      <td>1.314352</td>\n",
       "      <td>Arterial Blood Pressure systolic</td>\n",
       "      <td>mmHg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67260</th>\n",
       "      <td>10002428</td>\n",
       "      <td>23473524</td>\n",
       "      <td>35479615</td>\n",
       "      <td>220050.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>10.977222</td>\n",
       "      <td>Arterial Blood Pressure systolic</td>\n",
       "      <td>mmHg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67261</th>\n",
       "      <td>10004113</td>\n",
       "      <td>29879900</td>\n",
       "      <td>35200789</td>\n",
       "      <td>220050.0</td>\n",
       "      <td>124.0</td>\n",
       "      <td>33</td>\n",
       "      <td>M</td>\n",
       "      <td>1.060683</td>\n",
       "      <td>Arterial Blood Pressure systolic</td>\n",
       "      <td>mmHg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67262</th>\n",
       "      <td>10004235</td>\n",
       "      <td>24181354</td>\n",
       "      <td>34100191</td>\n",
       "      <td>220050.0</td>\n",
       "      <td>109.0</td>\n",
       "      <td>47</td>\n",
       "      <td>M</td>\n",
       "      <td>4.952106</td>\n",
       "      <td>Arterial Blood Pressure systolic</td>\n",
       "      <td>mmHg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67263</th>\n",
       "      <td>10004422</td>\n",
       "      <td>21255400</td>\n",
       "      <td>32155744</td>\n",
       "      <td>220050.0</td>\n",
       "      <td>105.0</td>\n",
       "      <td>78</td>\n",
       "      <td>M</td>\n",
       "      <td>6.356898</td>\n",
       "      <td>Arterial Blood Pressure systolic</td>\n",
       "      <td>mmHg</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       subject_id   hadm_id   stay_id    itemid  value  age gender        los  \\\n",
       "67259    10002013  23581541  39060235  220050.0  134.0   57      F   1.314352   \n",
       "67260    10002428  23473524  35479615  220050.0  148.0   81      F  10.977222   \n",
       "67261    10004113  29879900  35200789  220050.0  124.0   33      M   1.060683   \n",
       "67262    10004235  24181354  34100191  220050.0  109.0   47      M   4.952106   \n",
       "67263    10004422  21255400  32155744  220050.0  105.0   78      M   6.356898   \n",
       "\n",
       "                                  label unitname  \n",
       "67259  Arterial Blood Pressure systolic     mmHg  \n",
       "67260  Arterial Blood Pressure systolic     mmHg  \n",
       "67261  Arterial Blood Pressure systolic     mmHg  \n",
       "67262  Arterial Blood Pressure systolic     mmHg  \n",
       "67263  Arterial Blood Pressure systolic     mmHg  "
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sbp results\n",
    "result_df[result_df['itemid']==220050].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "      <th>label</th>\n",
       "      <th>unitname</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>79449</th>\n",
       "      <td>10002013</td>\n",
       "      <td>23581541</td>\n",
       "      <td>39060235</td>\n",
       "      <td>220051.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>57</td>\n",
       "      <td>F</td>\n",
       "      <td>1.314352</td>\n",
       "      <td>Arterial Blood Pressure diastolic</td>\n",
       "      <td>mmHg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79450</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>220051.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>6.178912</td>\n",
       "      <td>Arterial Blood Pressure diastolic</td>\n",
       "      <td>mmHg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79451</th>\n",
       "      <td>10002760</td>\n",
       "      <td>28094813</td>\n",
       "      <td>31831386</td>\n",
       "      <td>220051.0</td>\n",
       "      <td>56.0</td>\n",
       "      <td>56</td>\n",
       "      <td>M</td>\n",
       "      <td>1.045868</td>\n",
       "      <td>Arterial Blood Pressure diastolic</td>\n",
       "      <td>mmHg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79452</th>\n",
       "      <td>10003046</td>\n",
       "      <td>26048429</td>\n",
       "      <td>35514836</td>\n",
       "      <td>220051.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>64</td>\n",
       "      <td>M</td>\n",
       "      <td>1.974086</td>\n",
       "      <td>Arterial Blood Pressure diastolic</td>\n",
       "      <td>mmHg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79453</th>\n",
       "      <td>10004422</td>\n",
       "      <td>21255400</td>\n",
       "      <td>32155744</td>\n",
       "      <td>220051.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>78</td>\n",
       "      <td>M</td>\n",
       "      <td>6.356898</td>\n",
       "      <td>Arterial Blood Pressure diastolic</td>\n",
       "      <td>mmHg</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       subject_id   hadm_id   stay_id    itemid  value  age gender       los  \\\n",
       "79449    10002013  23581541  39060235  220051.0   75.0   57      F  1.314352   \n",
       "79450    10002155  23822395  33685454  220051.0   59.0   81      F  6.178912   \n",
       "79451    10002760  28094813  31831386  220051.0   56.0   56      M  1.045868   \n",
       "79452    10003046  26048429  35514836  220051.0   79.0   64      M  1.974086   \n",
       "79453    10004422  21255400  32155744  220051.0   50.0   78      M  6.356898   \n",
       "\n",
       "                                   label unitname  \n",
       "79449  Arterial Blood Pressure diastolic     mmHg  \n",
       "79450  Arterial Blood Pressure diastolic     mmHg  \n",
       "79451  Arterial Blood Pressure diastolic     mmHg  \n",
       "79452  Arterial Blood Pressure diastolic     mmHg  \n",
       "79453  Arterial Blood Pressure diastolic     mmHg  "
      ]
     },
     "execution_count": 178,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# dbp results\n",
    "result_df[result_df['itemid']==220051].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of patients with missing measurements in one encounter:(65445, 5)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>label</th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>Arterial Blood Pressure diastolic</th>\n",
       "      <th>Arterial Blood Pressure systolic</th>\n",
       "      <th>Heart Rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>29079034</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000980</td>\n",
       "      <td>26913865</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>76.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>88.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10001217</td>\n",
       "      <td>27703517</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10001725</td>\n",
       "      <td>25563031</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>70.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label  subject_id   hadm_id  Arterial Blood Pressure diastolic  \\\n",
       "0        10000032  29079034                                NaN   \n",
       "1        10000980  26913865                                NaN   \n",
       "2        10001217  24597018                                NaN   \n",
       "3        10001217  27703517                                NaN   \n",
       "4        10001725  25563031                                NaN   \n",
       "\n",
       "label  Arterial Blood Pressure systolic  Heart Rate  \n",
       "0                                   NaN        92.0  \n",
       "1                                   NaN        76.0  \n",
       "2                                   NaN        88.5  \n",
       "3                                   NaN        84.5  \n",
       "4                                   NaN        70.5  "
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Pivot the the table to create hr, systolic, and diastolic column for each row\n",
    "icu_demo_ce_pivoted = result_df.pivot_table(index=['subject_id', 'hadm_id'], \\\n",
    "                            columns='label', values='value').reset_index()\n",
    "print(f\"number of patients with missing measurements in one encounter:{icu_demo_ce_pivoted[icu_demo_ce_pivoted.isna().any(axis=1)].shape}\")\n",
    "icu_demo_ce_pivoted[icu_demo_ce_pivoted.isna().any(axis=1)].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of patients with complete measurement in one encounter: (792, 5)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>label</th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>Arterial Blood Pressure diastolic</th>\n",
       "      <th>Arterial Blood Pressure systolic</th>\n",
       "      <th>Heart Rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>207</th>\n",
       "      <td>10029291</td>\n",
       "      <td>22205327</td>\n",
       "      <td>55.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>48.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>313</th>\n",
       "      <td>10048105</td>\n",
       "      <td>28885740</td>\n",
       "      <td>47.0</td>\n",
       "      <td>109.0</td>\n",
       "      <td>93.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>333</th>\n",
       "      <td>10051043</td>\n",
       "      <td>26563181</td>\n",
       "      <td>48.0</td>\n",
       "      <td>122.0</td>\n",
       "      <td>68.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>515</th>\n",
       "      <td>10088198</td>\n",
       "      <td>25815214</td>\n",
       "      <td>50.0</td>\n",
       "      <td>109.0</td>\n",
       "      <td>147.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>619</th>\n",
       "      <td>10104730</td>\n",
       "      <td>23949226</td>\n",
       "      <td>59.0</td>\n",
       "      <td>108.0</td>\n",
       "      <td>148.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label  subject_id   hadm_id  Arterial Blood Pressure diastolic  \\\n",
       "207      10029291  22205327                               55.0   \n",
       "313      10048105  28885740                               47.0   \n",
       "333      10051043  26563181                               48.0   \n",
       "515      10088198  25815214                               50.0   \n",
       "619      10104730  23949226                               59.0   \n",
       "\n",
       "label  Arterial Blood Pressure systolic  Heart Rate  \n",
       "207                                66.0        48.0  \n",
       "313                               109.0        93.5  \n",
       "333                               122.0        68.0  \n",
       "515                               109.0       147.5  \n",
       "619                               108.0       148.5  "
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(f\"number of patients with complete measurement in one encounter: {icu_demo_ce_pivoted[~icu_demo_ce_pivoted.isna().any(axis=1)].shape}\")\n",
    "icu_demo_ce_pivoted[~icu_demo_ce_pivoted.isna().any(axis=1)].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>seq_num</th>\n",
       "      <th>icd_code</th>\n",
       "      <th>icd_version</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>22595853</td>\n",
       "      <td>1</td>\n",
       "      <td>5723</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000032</td>\n",
       "      <td>22595853</td>\n",
       "      <td>2</td>\n",
       "      <td>78959</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10000032</td>\n",
       "      <td>22595853</td>\n",
       "      <td>3</td>\n",
       "      <td>5715</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10000032</td>\n",
       "      <td>22595853</td>\n",
       "      <td>4</td>\n",
       "      <td>07070</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10000032</td>\n",
       "      <td>22595853</td>\n",
       "      <td>5</td>\n",
       "      <td>496</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id  seq_num icd_code  icd_version\n",
       "0    10000032  22595853        1     5723            9\n",
       "1    10000032  22595853        2    78959            9\n",
       "2    10000032  22595853        3     5715            9\n",
       "3    10000032  22595853        4    07070            9\n",
       "4    10000032  22595853        5      496            9"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Loading dx table\n",
    "dx_df = pd.read_csv('hosp/diagnoses_icd.csv.gz')\n",
    "dx_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>seq_num</th>\n",
       "      <th>icd_code</th>\n",
       "      <th>icd_version</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>232</th>\n",
       "      <td>10000980</td>\n",
       "      <td>20897796</td>\n",
       "      <td>2</td>\n",
       "      <td>I5033</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>252</th>\n",
       "      <td>10000980</td>\n",
       "      <td>24947999</td>\n",
       "      <td>1</td>\n",
       "      <td>42823</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>279</th>\n",
       "      <td>10000980</td>\n",
       "      <td>25242409</td>\n",
       "      <td>7</td>\n",
       "      <td>42832</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>308</th>\n",
       "      <td>10000980</td>\n",
       "      <td>25911675</td>\n",
       "      <td>2</td>\n",
       "      <td>I5023</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>330</th>\n",
       "      <td>10000980</td>\n",
       "      <td>26913865</td>\n",
       "      <td>2</td>\n",
       "      <td>42823</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     subject_id   hadm_id  seq_num icd_code  icd_version\n",
       "232    10000980  20897796        2    I5033           10\n",
       "252    10000980  24947999        1    42823            9\n",
       "279    10000980  25242409        7    42832            9\n",
       "308    10000980  25911675        2    I5023           10\n",
       "330    10000980  26913865        2    42823            9"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Filter hfpef and hfref\n",
    "hfpef_hfref_dx = dx_df[dx_df[\"icd_code\"].str.startswith('4282|4283|I502|I503')]\n",
    "hfpef_hfref_dx.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>seq_num</th>\n",
       "      <th>icd_code</th>\n",
       "      <th>icd_version</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10000032</td>\n",
       "      <td>22595853</td>\n",
       "      <td>1</td>\n",
       "      <td>5723</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000032</td>\n",
       "      <td>22595853</td>\n",
       "      <td>2</td>\n",
       "      <td>78959</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10000032</td>\n",
       "      <td>22595853</td>\n",
       "      <td>3</td>\n",
       "      <td>5715</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10000032</td>\n",
       "      <td>22595853</td>\n",
       "      <td>4</td>\n",
       "      <td>07070</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10000032</td>\n",
       "      <td>22595853</td>\n",
       "      <td>5</td>\n",
       "      <td>496</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id  seq_num icd_code  icd_version\n",
       "0    10000032  22595853        1     5723            9\n",
       "1    10000032  22595853        2    78959            9\n",
       "2    10000032  22595853        3     5715            9\n",
       "3    10000032  22595853        4    07070            9\n",
       "4    10000032  22595853        5      496            9"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Assuming hfpef_hfref_dx contains 'subject_id' and 'hadm_id' columns\n",
    "non_hf_dx = dx_df[~dx_df[['subject_id', 'hadm_id']].isin(hfpef_hfref_dx[['subject_id', 'hadm_id']]).all(axis=1)]\n",
    "non_hf_dx.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "      <th>label</th>\n",
       "      <th>unitname</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>88.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10001725</td>\n",
       "      <td>25563031</td>\n",
       "      <td>31205490</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>70.5</td>\n",
       "      <td>46</td>\n",
       "      <td>F</td>\n",
       "      <td>1.338588</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>10001884</td>\n",
       "      <td>26184834</td>\n",
       "      <td>37510196</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>77</td>\n",
       "      <td>F</td>\n",
       "      <td>9.171817</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>6.178912</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>10002155</td>\n",
       "      <td>28994087</td>\n",
       "      <td>31090461</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>92.5</td>\n",
       "      <td>82</td>\n",
       "      <td>F</td>\n",
       "      <td>3.891447</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id    itemid  value  age gender       los  \\\n",
       "2    10001217  24597018  37067082  220045.0   88.5   55      F  1.118032   \n",
       "4    10001725  25563031  31205490  220045.0   70.5   46      F  1.338588   \n",
       "5    10001884  26184834  37510196  220045.0   49.0   77      F  9.171817   \n",
       "7    10002155  23822395  33685454  220045.0   68.0   81      F  6.178912   \n",
       "8    10002155  28994087  31090461  220045.0   92.5   82      F  3.891447   \n",
       "\n",
       "        label unitname  \n",
       "2  Heart Rate      bpm  \n",
       "4  Heart Rate      bpm  \n",
       "5  Heart Rate      bpm  \n",
       "7  Heart Rate      bpm  \n",
       "8  Heart Rate      bpm  "
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Filter patients with > 24 hrs of data\n",
    "above_24_hr_result_df = result_df[result_df['los'] > 1]\n",
    "above_24_hr_result_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(72627, 10)\n",
      "42262\n"
     ]
    }
   ],
   "source": [
    "print(above_24_hr_result_df.shape)\n",
    "print(above_24_hr_result_df['subject_id'].nunique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "      <th>label</th>\n",
       "      <th>unitname</th>\n",
       "      <th>seq_num</th>\n",
       "      <th>icd_code</th>\n",
       "      <th>icd_version</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>6.178912</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>3</td>\n",
       "      <td>42821</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>220051.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>6.178912</td>\n",
       "      <td>Arterial Blood Pressure diastolic</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>3</td>\n",
       "      <td>42821</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10002155</td>\n",
       "      <td>28994087</td>\n",
       "      <td>31090461</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>92.5</td>\n",
       "      <td>82</td>\n",
       "      <td>F</td>\n",
       "      <td>3.891447</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>4</td>\n",
       "      <td>42822</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10002430</td>\n",
       "      <td>26295318</td>\n",
       "      <td>38392119</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>90</td>\n",
       "      <td>M</td>\n",
       "      <td>2.922593</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>I5023</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10003400</td>\n",
       "      <td>23559586</td>\n",
       "      <td>34577403</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>75</td>\n",
       "      <td>F</td>\n",
       "      <td>2.916701</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>20</td>\n",
       "      <td>I5032</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id    itemid  value  age gender       los  \\\n",
       "0    10002155  23822395  33685454  220045.0   68.0   81      F  6.178912   \n",
       "1    10002155  23822395  33685454  220051.0   59.0   81      F  6.178912   \n",
       "2    10002155  28994087  31090461  220045.0   92.5   82      F  3.891447   \n",
       "3    10002430  26295318  38392119  220045.0   96.0   90      M  2.922593   \n",
       "4    10003400  23559586  34577403  220045.0   96.0   75      F  2.916701   \n",
       "\n",
       "                               label unitname  seq_num icd_code  icd_version  \n",
       "0                         Heart Rate      bpm        3    42821            9  \n",
       "1  Arterial Blood Pressure diastolic     mmHg        3    42821            9  \n",
       "2                         Heart Rate      bpm        4    42822            9  \n",
       "3                         Heart Rate      bpm        1    I5023           10  \n",
       "4                         Heart Rate      bpm       20    I5032           10  "
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# merge patients > 24 hrs of lab data with the hf dx\n",
    "hf_above_24_hr_ce_dx = pd.merge(above_24_hr_result_df, hfpef_hfref_dx, on=[\"subject_id\", \"hadm_id\"], how='inner')\n",
    "hf_above_24_hr_ce_dx.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "      <th>label</th>\n",
       "      <th>unitname</th>\n",
       "      <th>seq_num</th>\n",
       "      <th>icd_code</th>\n",
       "      <th>icd_version</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>88.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>3240</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>88.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>2</td>\n",
       "      <td>3484</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>88.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>3</td>\n",
       "      <td>3485</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>88.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>4</td>\n",
       "      <td>5180</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>88.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>5</td>\n",
       "      <td>340</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id    itemid  value  age gender       los  \\\n",
       "0    10001217  24597018  37067082  220045.0   88.5   55      F  1.118032   \n",
       "1    10001217  24597018  37067082  220045.0   88.5   55      F  1.118032   \n",
       "2    10001217  24597018  37067082  220045.0   88.5   55      F  1.118032   \n",
       "3    10001217  24597018  37067082  220045.0   88.5   55      F  1.118032   \n",
       "4    10001217  24597018  37067082  220045.0   88.5   55      F  1.118032   \n",
       "\n",
       "        label unitname  seq_num icd_code  icd_version  \n",
       "0  Heart Rate      bpm        1     3240            9  \n",
       "1  Heart Rate      bpm        2     3484            9  \n",
       "2  Heart Rate      bpm        3     3485            9  \n",
       "3  Heart Rate      bpm        4     5180            9  \n",
       "4  Heart Rate      bpm        5      340            9  "
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# merge patients > 24 hrs of lab data with the non-hf dx\n",
    "non_hf_above_24_hr_ce_dx = pd.merge(above_24_hr_result_df, non_hf_dx, on=[\"subject_id\", \"hadm_id\"], how='inner')\n",
    "non_hf_above_24_hr_ce_dx.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "      <th>label</th>\n",
       "      <th>unitname</th>\n",
       "      <th>seq_num</th>\n",
       "      <th>icd_code</th>\n",
       "      <th>icd_version</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>88.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>3240</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10001725</td>\n",
       "      <td>25563031</td>\n",
       "      <td>31205490</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>70.5</td>\n",
       "      <td>46</td>\n",
       "      <td>F</td>\n",
       "      <td>1.338588</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>78829</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>10001884</td>\n",
       "      <td>26184834</td>\n",
       "      <td>37510196</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>77</td>\n",
       "      <td>F</td>\n",
       "      <td>9.171817</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>J441</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>6.178912</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>41011</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>10002155</td>\n",
       "      <td>28994087</td>\n",
       "      <td>31090461</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>92.5</td>\n",
       "      <td>82</td>\n",
       "      <td>F</td>\n",
       "      <td>3.891447</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>486</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     subject_id   hadm_id   stay_id    itemid  value  age gender       los  \\\n",
       "0      10001217  24597018  37067082  220045.0   88.5   55      F  1.118032   \n",
       "10     10001725  25563031  31205490  220045.0   70.5   46      F  1.338588   \n",
       "28     10001884  26184834  37510196  220045.0   49.0   77      F  9.171817   \n",
       "64     10002155  23822395  33685454  220045.0   68.0   81      F  6.178912   \n",
       "102    10002155  28994087  31090461  220045.0   92.5   82      F  3.891447   \n",
       "\n",
       "          label unitname  seq_num icd_code  icd_version  \n",
       "0    Heart Rate      bpm        1     3240            9  \n",
       "10   Heart Rate      bpm        1    78829            9  \n",
       "28   Heart Rate      bpm        1     J441           10  \n",
       "64   Heart Rate      bpm        1    41011            9  \n",
       "102  Heart Rate      bpm        1      486            9  "
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Drop duplicates\n",
    "non_hf_above_24_hr_ce_dx.drop_duplicates(subset=['subject_id', 'hadm_id', 'stay_id'], keep='first', inplace=True)\n",
    "non_hf_above_24_hr_ce_dx.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Loading and displaying item id within lab table\n",
    "lab_id = pd.read_csv('hosp/d_labitems.csv.gz')\n",
    "# Loading lab table\n",
    "lab_df = pd.read_csv(\"hosp/labevents.csv.gz\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>itemid</th>\n",
       "      <th>label</th>\n",
       "      <th>fluid</th>\n",
       "      <th>category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>160</th>\n",
       "      <td>50963</td>\n",
       "      <td>NTproBNP</td>\n",
       "      <td>Blood</td>\n",
       "      <td>Chemistry</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     itemid     label  fluid   category\n",
       "160   50963  NTproBNP  Blood  Chemistry"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lab_id[lab_id['itemid']==50963].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>labevent_id</th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>specimen_id</th>\n",
       "      <th>itemid</th>\n",
       "      <th>order_provider_id</th>\n",
       "      <th>charttime</th>\n",
       "      <th>storetime</th>\n",
       "      <th>value</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "      <th>ref_range_lower</th>\n",
       "      <th>ref_range_upper</th>\n",
       "      <th>flag</th>\n",
       "      <th>priority</th>\n",
       "      <th>comments</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>10000032</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52958335</td>\n",
       "      <td>50904</td>\n",
       "      <td>P28Z0X</td>\n",
       "      <td>2180-03-23 11:51:00</td>\n",
       "      <td>2180-03-23 16:40:00</td>\n",
       "      <td>52</td>\n",
       "      <td>52.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ROUTINE</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>10000032</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52958335</td>\n",
       "      <td>50907</td>\n",
       "      <td>P28Z0X</td>\n",
       "      <td>2180-03-23 11:51:00</td>\n",
       "      <td>2180-03-23 16:40:00</td>\n",
       "      <td>202</td>\n",
       "      <td>202.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>0.0</td>\n",
       "      <td>199.0</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>ROUTINE</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>60</td>\n",
       "      <td>10000032</td>\n",
       "      <td>NaN</td>\n",
       "      <td>73913913</td>\n",
       "      <td>50931</td>\n",
       "      <td>P28Z0X</td>\n",
       "      <td>2180-03-23 11:51:00</td>\n",
       "      <td>2180-03-23 15:56:00</td>\n",
       "      <td>___</td>\n",
       "      <td>95.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>70.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ROUTINE</td>\n",
       "      <td>IF FASTING, 70-100 NORMAL, &gt;125 PROVISIONAL DI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>103</th>\n",
       "      <td>104</td>\n",
       "      <td>10000032</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43430170</td>\n",
       "      <td>50931</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2180-05-06 22:25:00</td>\n",
       "      <td>2180-05-06 23:16:00</td>\n",
       "      <td>___</td>\n",
       "      <td>109.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>70.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>STAT</td>\n",
       "      <td>IF FASTING, 70-100 NORMAL, &gt;125 PROVISIONAL DI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>151</td>\n",
       "      <td>10000032</td>\n",
       "      <td>22595853.0</td>\n",
       "      <td>93044861</td>\n",
       "      <td>50931</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2180-05-07 05:05:00</td>\n",
       "      <td>2180-05-07 07:03:00</td>\n",
       "      <td>___</td>\n",
       "      <td>99.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>70.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ROUTINE</td>\n",
       "      <td>IF FASTING, 70-100 NORMAL, &gt;125 PROVISIONAL DI...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     labevent_id  subject_id     hadm_id  specimen_id  itemid  \\\n",
       "15            16    10000032         NaN     52958335   50904   \n",
       "17            18    10000032         NaN     52958335   50907   \n",
       "59            60    10000032         NaN     73913913   50931   \n",
       "103          104    10000032         NaN     43430170   50931   \n",
       "150          151    10000032  22595853.0     93044861   50931   \n",
       "\n",
       "    order_provider_id            charttime            storetime value  \\\n",
       "15             P28Z0X  2180-03-23 11:51:00  2180-03-23 16:40:00    52   \n",
       "17             P28Z0X  2180-03-23 11:51:00  2180-03-23 16:40:00   202   \n",
       "59             P28Z0X  2180-03-23 11:51:00  2180-03-23 15:56:00   ___   \n",
       "103               NaN  2180-05-06 22:25:00  2180-05-06 23:16:00   ___   \n",
       "150               NaN  2180-05-07 05:05:00  2180-05-07 07:03:00   ___   \n",
       "\n",
       "     valuenum valueuom  ref_range_lower  ref_range_upper      flag priority  \\\n",
       "15       52.0    mg/dL              NaN              NaN       NaN  ROUTINE   \n",
       "17      202.0    mg/dL              0.0            199.0  abnormal  ROUTINE   \n",
       "59       95.0    mg/dL             70.0            100.0       NaN  ROUTINE   \n",
       "103     109.0    mg/dL             70.0            100.0  abnormal     STAT   \n",
       "150      99.0    mg/dL             70.0            100.0       NaN  ROUTINE   \n",
       "\n",
       "                                              comments  \n",
       "15                                                 NaN  \n",
       "17                                                 NaN  \n",
       "59   IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI...  \n",
       "103  IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI...  \n",
       "150  IF FASTING, 70-100 NORMAL, >125 PROVISIONAL DI...  "
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# HDL: 50904, total cholesterol : 50907, glucose : 51478, bnp =50963\n",
    "bnp_hdl_chol_glu_lab = lab_df[lab_df[\"itemid\"].isin([50904,50907,50931, 50963])]\n",
    "bnp_hdl_chol_glu_lab.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3311010, 16)"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bnp_hdl_chol_glu_lab.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1409165"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(bnp_hdl_chol_glu_lab['hadm_id'].isna())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid_ce</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "      <th>label</th>\n",
       "      <th>unitname</th>\n",
       "      <th>seq_num</th>\n",
       "      <th>icd_code</th>\n",
       "      <th>icd_version</th>\n",
       "      <th>itemid_lab</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>88.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>3240</td>\n",
       "      <td>9</td>\n",
       "      <td>50931</td>\n",
       "      <td>112.0</td>\n",
       "      <td>mg/dL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>10001725</td>\n",
       "      <td>25563031</td>\n",
       "      <td>31205490</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>70.5</td>\n",
       "      <td>46</td>\n",
       "      <td>F</td>\n",
       "      <td>1.338588</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>78829</td>\n",
       "      <td>9</td>\n",
       "      <td>50931</td>\n",
       "      <td>146.0</td>\n",
       "      <td>mg/dL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10001884</td>\n",
       "      <td>26184834</td>\n",
       "      <td>37510196</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>77</td>\n",
       "      <td>F</td>\n",
       "      <td>9.171817</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>J441</td>\n",
       "      <td>10</td>\n",
       "      <td>50931</td>\n",
       "      <td>128.0</td>\n",
       "      <td>mg/dL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>6.178912</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>41011</td>\n",
       "      <td>9</td>\n",
       "      <td>50931</td>\n",
       "      <td>95.0</td>\n",
       "      <td>mg/dL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>10002155</td>\n",
       "      <td>28994087</td>\n",
       "      <td>31090461</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>92.5</td>\n",
       "      <td>82</td>\n",
       "      <td>F</td>\n",
       "      <td>3.891447</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>486</td>\n",
       "      <td>9</td>\n",
       "      <td>50931</td>\n",
       "      <td>106.0</td>\n",
       "      <td>mg/dL</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    subject_id   hadm_id   stay_id  itemid_ce  value  age gender       los  \\\n",
       "0     10001217  24597018  37067082   220045.0   88.5   55      F  1.118032   \n",
       "5     10001725  25563031  31205490   220045.0   70.5   46      F  1.338588   \n",
       "9     10001884  26184834  37510196   220045.0   49.0   77      F  9.171817   \n",
       "27    10002155  23822395  33685454   220045.0   68.0   81      F  6.178912   \n",
       "43    10002155  28994087  31090461   220045.0   92.5   82      F  3.891447   \n",
       "\n",
       "         label unitname  seq_num icd_code  icd_version  itemid_lab  valuenum  \\\n",
       "0   Heart Rate      bpm        1     3240            9       50931     112.0   \n",
       "5   Heart Rate      bpm        1    78829            9       50931     146.0   \n",
       "9   Heart Rate      bpm        1     J441           10       50931     128.0   \n",
       "27  Heart Rate      bpm        1    41011            9       50931      95.0   \n",
       "43  Heart Rate      bpm        1      486            9       50931     106.0   \n",
       "\n",
       "   valueuom  \n",
       "0     mg/dL  \n",
       "5     mg/dL  \n",
       "9     mg/dL  \n",
       "27    mg/dL  \n",
       "43    mg/dL  "
      ]
     },
     "execution_count": 188,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "non_hf_ce_lab = pd.merge(non_hf_above_24_hr_ce_dx, bnp_hdl_chol_glu_lab[[\"subject_id\",\"hadm_id\",\"itemid\",\"valuenum\", \"valueuom\"]], \\\n",
    "                         on=[\"subject_id\",\"hadm_id\"], how=\"inner\", suffixes=(\"_ce\",\"_lab\"))\n",
    "non_hf_ce_lab.drop_duplicates(subset=['subject_id', 'hadm_id', 'stay_id','itemid_ce','itemid_lab'], keep='first', inplace=True)\n",
    "non_hf_ce_lab.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape (77328, 18)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid_ce</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "      <th>label_ce</th>\n",
       "      <th>unitname</th>\n",
       "      <th>seq_num</th>\n",
       "      <th>icd_code</th>\n",
       "      <th>icd_version</th>\n",
       "      <th>itemid_lab</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "      <th>itemid</th>\n",
       "      <th>label_lab</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10001217</td>\n",
       "      <td>24597018</td>\n",
       "      <td>37067082</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>88.5</td>\n",
       "      <td>55</td>\n",
       "      <td>F</td>\n",
       "      <td>1.118032</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>3240</td>\n",
       "      <td>9</td>\n",
       "      <td>50931</td>\n",
       "      <td>112.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>50931</td>\n",
       "      <td>Glucose</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10001725</td>\n",
       "      <td>25563031</td>\n",
       "      <td>31205490</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>70.5</td>\n",
       "      <td>46</td>\n",
       "      <td>F</td>\n",
       "      <td>1.338588</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>78829</td>\n",
       "      <td>9</td>\n",
       "      <td>50931</td>\n",
       "      <td>146.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>50931</td>\n",
       "      <td>Glucose</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10001884</td>\n",
       "      <td>26184834</td>\n",
       "      <td>37510196</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>77</td>\n",
       "      <td>F</td>\n",
       "      <td>9.171817</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>J441</td>\n",
       "      <td>10</td>\n",
       "      <td>50931</td>\n",
       "      <td>128.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>50931</td>\n",
       "      <td>Glucose</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>6.178912</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>41011</td>\n",
       "      <td>9</td>\n",
       "      <td>50931</td>\n",
       "      <td>95.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>50931</td>\n",
       "      <td>Glucose</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10002155</td>\n",
       "      <td>28994087</td>\n",
       "      <td>31090461</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>92.5</td>\n",
       "      <td>82</td>\n",
       "      <td>F</td>\n",
       "      <td>3.891447</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>486</td>\n",
       "      <td>9</td>\n",
       "      <td>50931</td>\n",
       "      <td>106.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>50931</td>\n",
       "      <td>Glucose</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id  itemid_ce  value  age gender       los  \\\n",
       "0    10001217  24597018  37067082   220045.0   88.5   55      F  1.118032   \n",
       "1    10001725  25563031  31205490   220045.0   70.5   46      F  1.338588   \n",
       "2    10001884  26184834  37510196   220045.0   49.0   77      F  9.171817   \n",
       "3    10002155  23822395  33685454   220045.0   68.0   81      F  6.178912   \n",
       "4    10002155  28994087  31090461   220045.0   92.5   82      F  3.891447   \n",
       "\n",
       "     label_ce unitname  seq_num icd_code  icd_version  itemid_lab  valuenum  \\\n",
       "0  Heart Rate      bpm        1     3240            9       50931     112.0   \n",
       "1  Heart Rate      bpm        1    78829            9       50931     146.0   \n",
       "2  Heart Rate      bpm        1     J441           10       50931     128.0   \n",
       "3  Heart Rate      bpm        1    41011            9       50931      95.0   \n",
       "4  Heart Rate      bpm        1      486            9       50931     106.0   \n",
       "\n",
       "  valueuom  itemid label_lab  \n",
       "0    mg/dL   50931   Glucose  \n",
       "1    mg/dL   50931   Glucose  \n",
       "2    mg/dL   50931   Glucose  \n",
       "3    mg/dL   50931   Glucose  \n",
       "4    mg/dL   50931   Glucose  "
      ]
     },
     "execution_count": 189,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "non_hf_ce_lab = pd.merge(non_hf_ce_lab, lab_id[[\"itemid\", \"label\"]], left_on=\"itemid_lab\", right_on=\"itemid\", how=\"inner\", suffixes=(\"_ce\", \"_lab\"))\n",
    "print(f\"shape {non_hf_ce_lab.shape}\")\n",
    "non_hf_ce_lab.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape (24584, 16)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid_ce</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "      <th>label</th>\n",
       "      <th>unitname</th>\n",
       "      <th>seq_num</th>\n",
       "      <th>icd_code</th>\n",
       "      <th>icd_version</th>\n",
       "      <th>itemid_lab</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>6.178912</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>3</td>\n",
       "      <td>42821</td>\n",
       "      <td>9</td>\n",
       "      <td>50931.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>mg/dL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>220051.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>6.178912</td>\n",
       "      <td>Arterial Blood Pressure diastolic</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>3</td>\n",
       "      <td>42821</td>\n",
       "      <td>9</td>\n",
       "      <td>50931.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>mg/dL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>10002155</td>\n",
       "      <td>28994087</td>\n",
       "      <td>31090461</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>92.5</td>\n",
       "      <td>82</td>\n",
       "      <td>F</td>\n",
       "      <td>3.891447</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>4</td>\n",
       "      <td>42822</td>\n",
       "      <td>9</td>\n",
       "      <td>50931.0</td>\n",
       "      <td>106.0</td>\n",
       "      <td>mg/dL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>10002430</td>\n",
       "      <td>26295318</td>\n",
       "      <td>38392119</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>90</td>\n",
       "      <td>M</td>\n",
       "      <td>2.922593</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>I5023</td>\n",
       "      <td>10</td>\n",
       "      <td>50931.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>mg/dL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>10002430</td>\n",
       "      <td>26295318</td>\n",
       "      <td>38392119</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>90</td>\n",
       "      <td>M</td>\n",
       "      <td>2.922593</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>I5023</td>\n",
       "      <td>10</td>\n",
       "      <td>50963.0</td>\n",
       "      <td>23132.0</td>\n",
       "      <td>pg/mL</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    subject_id   hadm_id   stay_id  itemid_ce  value  age gender       los  \\\n",
       "0     10002155  23822395  33685454   220045.0   68.0   81      F  6.178912   \n",
       "16    10002155  23822395  33685454   220051.0   59.0   81      F  6.178912   \n",
       "32    10002155  28994087  31090461   220045.0   92.5   82      F  3.891447   \n",
       "37    10002430  26295318  38392119   220045.0   96.0   90      M  2.922593   \n",
       "38    10002430  26295318  38392119   220045.0   96.0   90      M  2.922593   \n",
       "\n",
       "                                label unitname  seq_num icd_code  icd_version  \\\n",
       "0                          Heart Rate      bpm        3    42821            9   \n",
       "16  Arterial Blood Pressure diastolic     mmHg        3    42821            9   \n",
       "32                         Heart Rate      bpm        4    42822            9   \n",
       "37                         Heart Rate      bpm        1    I5023           10   \n",
       "38                         Heart Rate      bpm        1    I5023           10   \n",
       "\n",
       "    itemid_lab  valuenum valueuom  \n",
       "0      50931.0      95.0    mg/dL  \n",
       "16     50931.0      95.0    mg/dL  \n",
       "32     50931.0     106.0    mg/dL  \n",
       "37     50931.0     128.0    mg/dL  \n",
       "38     50963.0   23132.0    pg/mL  "
      ]
     },
     "execution_count": 190,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hf_ce_lab = pd.merge(hf_above_24_hr_ce_dx, bnp_hdl_chol_glu_lab[[\"subject_id\",\"hadm_id\",\"itemid\",\"valuenum\", \"valueuom\"]], \\\n",
    "                         on=[\"subject_id\",\"hadm_id\"], how=\"left\", suffixes=(\"_ce\",\"_lab\"))\n",
    "hf_ce_lab.drop_duplicates(subset=['subject_id', 'hadm_id', 'stay_id','itemid_ce','itemid_lab'], keep='first', inplace=True)\n",
    "print(f\"shape {hf_ce_lab.shape}\")\n",
    "hf_ce_lab.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>itemid_ce</th>\n",
       "      <th>value</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>los</th>\n",
       "      <th>label_ce</th>\n",
       "      <th>unitname</th>\n",
       "      <th>seq_num</th>\n",
       "      <th>icd_code</th>\n",
       "      <th>icd_version</th>\n",
       "      <th>itemid_lab</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "      <th>itemid</th>\n",
       "      <th>label_lab</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>6.178912</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>3</td>\n",
       "      <td>42821</td>\n",
       "      <td>9</td>\n",
       "      <td>50931.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>50931</td>\n",
       "      <td>Glucose</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>220051.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>81</td>\n",
       "      <td>F</td>\n",
       "      <td>6.178912</td>\n",
       "      <td>Arterial Blood Pressure diastolic</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>3</td>\n",
       "      <td>42821</td>\n",
       "      <td>9</td>\n",
       "      <td>50931.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>50931</td>\n",
       "      <td>Glucose</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10002155</td>\n",
       "      <td>28994087</td>\n",
       "      <td>31090461</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>92.5</td>\n",
       "      <td>82</td>\n",
       "      <td>F</td>\n",
       "      <td>3.891447</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>4</td>\n",
       "      <td>42822</td>\n",
       "      <td>9</td>\n",
       "      <td>50931.0</td>\n",
       "      <td>106.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>50931</td>\n",
       "      <td>Glucose</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10002430</td>\n",
       "      <td>26295318</td>\n",
       "      <td>38392119</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>90</td>\n",
       "      <td>M</td>\n",
       "      <td>2.922593</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>1</td>\n",
       "      <td>I5023</td>\n",
       "      <td>10</td>\n",
       "      <td>50931.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>50931</td>\n",
       "      <td>Glucose</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10003400</td>\n",
       "      <td>23559586</td>\n",
       "      <td>34577403</td>\n",
       "      <td>220045.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>75</td>\n",
       "      <td>F</td>\n",
       "      <td>2.916701</td>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>bpm</td>\n",
       "      <td>20</td>\n",
       "      <td>I5032</td>\n",
       "      <td>10</td>\n",
       "      <td>50931.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>50931</td>\n",
       "      <td>Glucose</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   subject_id   hadm_id   stay_id  itemid_ce  value  age gender       los  \\\n",
       "0    10002155  23822395  33685454   220045.0   68.0   81      F  6.178912   \n",
       "1    10002155  23822395  33685454   220051.0   59.0   81      F  6.178912   \n",
       "2    10002155  28994087  31090461   220045.0   92.5   82      F  3.891447   \n",
       "3    10002430  26295318  38392119   220045.0   96.0   90      M  2.922593   \n",
       "4    10003400  23559586  34577403   220045.0   96.0   75      F  2.916701   \n",
       "\n",
       "                            label_ce unitname  seq_num icd_code  icd_version  \\\n",
       "0                         Heart Rate      bpm        3    42821            9   \n",
       "1  Arterial Blood Pressure diastolic     mmHg        3    42821            9   \n",
       "2                         Heart Rate      bpm        4    42822            9   \n",
       "3                         Heart Rate      bpm        1    I5023           10   \n",
       "4                         Heart Rate      bpm       20    I5032           10   \n",
       "\n",
       "   itemid_lab  valuenum valueuom  itemid label_lab  \n",
       "0     50931.0      95.0    mg/dL   50931   Glucose  \n",
       "1     50931.0      95.0    mg/dL   50931   Glucose  \n",
       "2     50931.0     106.0    mg/dL   50931   Glucose  \n",
       "3     50931.0     128.0    mg/dL   50931   Glucose  \n",
       "4     50931.0     101.0    mg/dL   50931   Glucose  "
      ]
     },
     "execution_count": 191,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# HDL: 50904, total cholesterol : 50907, glucose : 51478, bnp =50963, HR = 20045, SBP = 20050, DBP = 20051\n",
    "hf_ce_lab = pd.merge(hf_ce_lab, lab_id[[\"itemid\", \"label\"]], left_on=\"itemid_lab\", right_on=\"itemid\", how=\"inner\", suffixes=(\"_ce\", \"_lab\"))\n",
    "hf_ce_lab.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Glucose               16401\n",
       "NTproBNP               4283\n",
       "Cholesterol, Total     1973\n",
       "Cholesterol, HDL       1911\n",
       "Name: label_lab, dtype: int64"
      ]
     },
     "execution_count": 201,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hf_ce_lab[\"label_lab\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Assuming hf_ce_lab is your DataFrame\n",
    "hf_ce_lab['dx'] = np.where(hf_ce_lab['icd_code'].str.startswith(('I502', '4282')), 'HFrEF', 'HFpEF')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 205,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAsgAAAGoCAYAAABbtxOxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAmh0lEQVR4nO3de7xVdZ3/8dcnEUPNC3okLiplGCgqGnmZGmsiHHPyVmqhFRrG5HSdX01S/fplTc4w00xZaWNWJpNlaWpQaRPRmJWmgWJ5Y/CWKASoIAqVI35+f6wFfj0cztkHWWdzeT0fj/3Ye631XWt91jqbw/t893etHZmJJEmSpMoL2l2AJEmStCkxIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiQ1ICIujIhPFNNnRcTiiHgyInarn1+6gdvOiHjZepZdFxFnbmjdm7OIeG1EPLQJ1DEgIn4QEY9HxBURcVpE/KTddUlqnQFZ2gpExAMR8VRE7N5p/tw6bA1vU10viYhnIuLLDe9nUkTcHRFP1CH1RxHxonrZJRHxmee5/dMj4pflvMx8d2b+Y718W+BzwFGZuWNmPlo/3/d89tukiBhevzf6FfMOjYhrImJ5RDwWETdHxBn1stfWP8sn68dDEXF5RLyyh/30j4hzImJ+RKys36sX9/V7sn4fPFXX/lhEzIyIkRu4uZOAQcBumXlyZn4rM4/aiOVKapgBWdp63A9MWDMREQcAA9pXDgDvAJYBb42I7ZrYQUS8BvgnYEJmvggYBVzei/X79dyqR4OAFwJ3bIRttUVEHAH8DPg58DJgN+As4A1Fs4WZuSPwIuBw4G7gFxExrptNfw84DjgV2Bk4CJgDdLdOU/61rn8YsAS4pHODqPT0f+fewP9k5tMbv0RJfcGALG09vkkVSNeYCPxn2SAitouIf4uIB+ue1gsjYkC9bNeI+GFELI2IZfXrYcW610XEP0bEr+qe2p907rHuwjuA/wv8L3Bsp1qOioh59cfUX46In5dDByLinRFxV13Lf0XE3uvZxyuBGzPzVoDMfCwzp2XmExExGTgN+Ejdc/iDetsPRMTZEfFbYGVE9IuIKRFxb31sd0bEiXXbUcCFwBH1NpbX8y+JiM9ExL7AvLqW5RHxs3r52mES3Z33evk/RMSiiFgYEe/s4ZwC7FP37j4eEdMjYmC9nR9FxPs6neffRsQJLWzzs8C0zPyXzHwkK3My85TODetlD2Xm/wO+BvxLVxuMiNcD44HjM/M3mfl0Zj6emRdk5tfrNkMiYkbdq3tPRLyrWH9AfZ6XRcSdVD/rcvtDIuLK+j17f0S8v4XjJDNXAd8GRtfbuS4izo2IXwGrgJdGxMi6l/mx+n16St32U8D/A95Svx8mRfEJQ0T8RUQ8EhF71tMHRdUjP7KefmNUn+wsj4gbIuLA4njOjoiH6/fgvB7+8JD0fGSmDx8+tvAH8ADweqqgNgrYBlhA1dOVwPC63XnADGAgVS/gD4B/rpftBrwZ2L5edgXw/WIf1wH3AvtS9UxfB0ztpqa/BP4M7Ap8CZhRLNsdWAG8CegHfIAqRJ9ZLz8BuKc+ln5UIfuGbvbzR+BTwKuA7TotvwT4TBfnay6wJzCgnncyMISqY+EtwEpgcL3sdOCX69suMLw+z/2K5Qm8rIXzfjSwmCqs7UAV3Nau28XxXgc8XLS/Eri0XnYKcFPR9iDgUaB/F9tZW3P9M18N/FU3P8/XAg91Mf91wDPADl0smwr8vIf37s+BL1P1wI8BlgLjivV/UZ+3PYHb19RQ/5zmUIXV/sBLgfuAv17Pfsqf1471ef5FcU4fBPavz8fOVP9+zqinDwEeAfav25+z5px39f4AzqXqjR8A/BZ4bz3/EKqe68Oo/o1OpHovbge8vN7nkOLns0+7f7f48LGlPuxBlrYua3qRx1N9/P3wmgUREcC7gL/Pqpf1CaqhCW8FyGrc7JWZuapedi7wmk7b/0Zm/k9m/pFqGMOYbmqZCFybmcuowsgbImKPetkxwB2ZeVVWH1N/EfhDse7fUgXIu+rl/wSM6aoXOTN/QRW0DwF+BDwaEZ+LiG26P1V8MTMX1MdCZl6RmQsz85nM/C4wHzi0h230qKfzThVqv5GZt2fmSqrw1ZNvFu0/AZxSH+90YEREjKjbvR34bmY+1cP2dqUKnIt6c2y1hUAAu3SxbLfutln3sr4aODsz/5SZc6l6pN9eNzkFOLc+bwuo3idrvBLoyMxPZ+ZTWY33/irPnteufLj+BOAeqpB8erHsksy8o36/HQ08kJnfyKrX+xaqP0RO6mbbpXOoQvbNVOfngnr+u4CvZOZNmbk6M6dR/RF5ONUfKNsB+0XEtpn5QGbe2+L+JPWSAVnaunyTaqzn6XQaXgF0UPUUzqk/3l0O/LieT0RsHxFfiYjfR8QK4Hpgl05Bswyxq6hCxjrq4QMnA98CyMwbqXroTq2bDKHqLaNenkB5d4K9gS8UdT5GFcKGdrW/zLw2M4+l6mk8vj7+nu70sKCciIh3FB99L6fqoe1pCEkruj3vdDoXwO9b2Gbn9tsCu2fmn6n+cHlbVONoJ1C9J3qyjKoXeHALbTsbStUTvbyLZY/2sM0hwJo/Gtb4Pc/+nLs7N3sDQ9ac0/q8foxqPPj6/Ftm7pKZL87M4zoF0HI/ewOHddr2acCLu9n2Wpn5v1Q91qOBf6/f32u2+6FO292Tqtf4HuCDVOF6SUR8JyKGtLI/Sb1nQJa2Ipn5e6qL9Y4Bruq0+BGqoQj71yFhl8zcOauLlgA+RPUx72GZuRNwZD0/NqCUE4GdgC9HxB8i4g9UoWfNGOlFVBdKVTuoelmHFesvAP62qHOXzByQmTd0t9O693cW1cfbo9fMXl/zYv97U/U+vpfqzgS7UH2cH53bboCezvsiqpC0xl4tbLNz+/+t9wMwjSrMjQNW1X+cdCurMbk3Ug2x6a0TgVvq3uzOfgocGsVY9k4WAgOjvuNIbS+e/eSju3OzALi/03vkRZl5zAYcAzz3Z7yAamhIue0dM/OsVjYUEUOBTwLfAP49nr1AdQFVj3i53e0z8zKAzPx2Zr6aZ4dGdTm2W9LzZ0CWtj6TgNd1DiyZ+QxVCPz8mqEOETE0Iv66bvIiqiC3vL7o65PPo4aJwMXAAVTDMMZQjQ8eE9XdNX4EHBARJ0R1F4n38NzeuQuBj0bE/nWdO0fEyV3tKCKOj4i3RnWRYUTEoVRDQ35dN1lMNT61OztQBZKl9TbP4NmAvWYbwyKifysHX2rhvF8OnB4R+0XE9rR23t9WtP808L3MXF3v70aq3uB/p7Xe4zU+UtfxDxGxW13nQRHxnc4N6/M8NCI+SdVT/7GuNpiZPwVmAldHxCuiuhjyRRHx7oh4Zz1s4gbgnyPihfUFa5OoP3mgOjcfrX+2w4DyAsSbgRX1hW0DImKbiBgdPdx2rkU/BPaNiLdHxLb145VRXbDZrfqPvUuAr9fHsgj4x3rxV4F3R8Rh9TncISL+pj4nL4+I19Vh+k9U/xZXb4RjkdQFA7K0lcnMezNz9noWn001/vLX9TCKn1L1GkN1IdkAqp7IX1MNA+i1uvdsHHBeZv6heMyptzkxMx+hGoLxr1Qfw+8HzKYaj0lmXk3Ve/adus7bee7txkrLqMZ2zqe68O9S4LOZuSZkfZ1qXOfyiPh+VxvIzDupAuWNVGH4AOBXRZOfUd3C7Q8R8ci6W+jRes97Zl5Lde5/Vrf5WQvb+yZVCPsD1cVtne/e8J/1MVzaaoF17/zr6sd9EfEYcBFwTdFsSEQ8CTwJ/Kbex2szs7svyTip3sZ3gcepfpZjqc4BVMNAhlP1Jl8NfDIzZ9bLPkU1rOJ+4CcUgb/+g+BYqj++7qd6336Nauzv81IP+TiKajzzQqrz/C9UY4R78n6qYR6fqIdWnAGcERF/Wf+7fBdwPtX79h6eHQe9HdVFiY/U+9uD9fzhIen5i2eHPknSpqkeL/sQcFpm/ne769ncRcQ7gMn1x/WSpE7sQZa0SYqIv46IXeqPlD9GNd731z2sph7Uwy7+jqr3V5LUBQOypE3VEVT3VX6E6qPyE9bcck0bph7XvJRqmMi321yOJG2yHGIhSZIkFexBliRJkgr92l1AK3bfffccPnx4u8uQJEnSFmTOnDmPZGZH5/mbRUAePnw4s2ev765UkiRJUu9FRJffTuoQC0mSJKlgQJYkSZIKBmRJkiSpYECWJEmSCgZkSZIkqWBAliRJkgoGZEmSJKlgQJaAefPmMWbMmLWPnXbaifPOO49PfOITHHjggYwZM4ajjjqKhQsXdrn+5z//efbff39Gjx7NhAkT+NOf/gTAOeecw9ChQ9du95prrunLw5IkSRsgMrPdNfRo7Nix6ReFqK+sXr2aoUOHctNNN7Hrrruy0047AfDFL36RO++8kwsvvPA57R9++GFe/epXc+eddzJgwABOOeUUjjnmGE4//XTOOeccdtxxRz784Q+341AkSVI3ImJOZo7tPH+z+CY9qS/NmjWLffbZh7333vs581euXElEdLnO008/zR//+Ee23XZbVq1axZAhQ/qiVEmS1ACHWEidfOc732HChAlrpz/+8Y+z55578q1vfYtPf/rT67QfOnQoH/7wh9lrr70YPHgwO++8M0cdddTa5eeffz4HHngg73znO1m2bFmfHIMkSdpwBmSp8NRTTzFjxgxOPvnktfPOPfdcFixYwGmnncb555+/zjrLli1j+vTp3H///SxcuJCVK1dy6aWXAnDWWWdx7733MnfuXAYPHsyHPvShPjsWSZK0YQzIUuHaa6/lkEMOYdCgQessO/XUU7nyyivXmf/Tn/6Ul7zkJXR0dLDtttvypje9iRtuuAGAQYMGsc022/CCF7yAd73rXdx8882NH4MkSXp+DMhS4bLLLnvO8Ir58+evfT1jxgxGjhy5zjp77bUXv/71r1m1ahWZyaxZsxg1ahQAixYtWtvu6quvZvTo0Q1WL0mSNgYv0pNqq1atYubMmXzlK19ZO2/KlCnMmzePF7zgBey9995r72CxcOFCzjzzTK655hoOO+wwTjrpJA455BD69evHwQcfzOTJkwH4yEc+wty5c4kIhg8f/pxtS5KkTZO3eZMkSdJWydu8CYCptz7S7hKkdUw5ePd2lyBJ0lqOQZYkSZIKBmRJkiSpYECWJEmSCgZkSZIkqWBAliRJkgoGZEmSJKlgQJYkSZIKBmRJkiSpYECWJEm9snz5ck466SRGjhzJqFGjuPHGG7nttts44ogjOOCAAzj22GNZsWJFl+t+4QtfYPTo0ey///6cd955a+c/9thjjB8/nhEjRjB+/HiWLVvWR0cjrcuALEmSeuUDH/gARx99NHfffTe33XYbo0aN4swzz2Tq1Kn87ne/48QTT+Szn/3sOuvdfvvtfPWrX+Xmm2/mtttu44c//CHz588HYOrUqYwbN4758+czbtw4pk6d2teHJa1lQJYkSS1bsWIF119/PZMmTQKgf//+7LLLLsybN48jjzwSgPHjx3PllVeus+5dd93F4Ycfzvbbb0+/fv14zWtew9VXXw3A9OnTmThxIgATJ07k+9//ft8ckNQFA7IkSWrZfffdR0dHB2eccQYHH3wwZ555JitXrmT06NHMmDEDgCuuuIIFCxass+7o0aO5/vrrefTRR1m1ahXXXHPN2naLFy9m8ODBAAwePJglS5b03UFJnRiQJUlSy55++mluueUWzjrrLG699VZ22GEHpk6dysUXX8wFF1zAK17xCp544gn69++/zrqjRo3i7LPPZvz48Rx99NEcdNBB9OvXrw1HIXXPgCxJklo2bNgwhg0bxmGHHQbASSedxC233MLIkSP5yU9+wpw5c5gwYQL77LNPl+tPmjSJW265heuvv56BAwcyYsQIAAYNGsSiRYsAWLRoEXvssUffHJDUhcYCckS8PCLmFo8VEfHBiBgYETMjYn79vGtTNUiSpI3rxS9+MXvuuSfz5s0DYNasWey3335rh0Q888wzfOYzn+Hd7353l+uvaffggw9y1VVXMWHCBACOO+44pk2bBsC0adM4/vjjmz4Uab0aC8iZOS8zx2TmGOAVwCrgamAKMCszRwCz6mlJkrSZ+NKXvsRpp53GgQceyNy5c/nYxz7GZZddxr777svIkSMZMmQIZ5xxBgALFy7kmGOOWbvum9/8Zvbbbz+OPfZYLrjgAnbdteonmzJlCjNnzmTEiBHMnDmTKVOMB2qfyMzmdxJxFPDJzHxVRMwDXpuZiyJiMHBdZr68u/XHjh2bs2fPbrzOrcHUWx9pdwnSOqYcvHu7S5AkbYUiYk5mju08v69Gxr8VuKx+PSgzFwHUIbnLQUYRMRmYDLDXXnv1SZGSJK1hh4I2VXYqNK/xi/Qioj9wHHBFb9bLzIsyc2xmju3o6GimOEmSJKmTvriLxRuAWzJzcT29uB5aQf3sjQ4lSZK0yeiLgDyBZ4dXAMwAJtavJwLT+6AGSZIkqSWNBuSI2B4YD1xVzJ4KjI+I+fUyv2xdkiRJm4xGL9LLzFXAbp3mPQqMa3K/kiRJ0obym/QkSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkQqMBOSJ2iYjvRcTdEXFXRBwREQMjYmZEzK+fd22yBkmSJKk3mu5B/gLw48wcCRwE3AVMAWZl5ghgVj0tSZIkbRIaC8gRsRNwJPB1gMx8KjOXA8cD0+pm04ATmqpBkiRJ6q0me5BfCiwFvhERt0bE1yJiB2BQZi4CqJ/3aLAGSZIkqVeaDMj9gEOA/8jMg4GV9GI4RURMjojZETF76dKlTdUoSZIkPUeTAfkh4KHMvKme/h5VYF4cEYMB6uclXa2cmRdl5tjMHNvR0dFgmZIkSdKzGgvImfkHYEFEvLyeNQ64E5gBTKznTQSmN1WDJEmS1Fv9Gt7++4BvRUR/4D7gDKpQfnlETAIeBE5uuAZJkiSpZY0G5MycC4ztYtG4JvcrSZIkbSi/SU+SJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkQr8mNx4RDwBPAKuBpzNzbEQMBL4LDAceAE7JzGVN1iFJkiS1qi96kP8qM8dk5th6egowKzNHALPqaUmSJGmT0I4hFscD0+rX04AT2lCDJEmS1KWmA3ICP4mIORExuZ43KDMXAdTPe3S1YkRMjojZETF76dKlDZcpSZIkVRodgwy8KjMXRsQewMyIuLvVFTPzIuAigLFjx2ZTBUqSJEmlRnuQM3Nh/bwEuBo4FFgcEYMB6uclTdYgSZIk9UZjATkidoiIF615DRwF3A7MACbWzSYC05uqQZIkSeqtJodYDAKujog1+/l2Zv44In4DXB4Rk4AHgZMbrEGSJEnqlcYCcmbeBxzUxfxHgXFN7VeSJEl6PvwmPUmSJKlgQJYkSZIKBmRJkiSpYECWJEmSCgZkSZIkqWBAliRJkgoGZEmSJKlgQJYkSZIKBmRJkiSpYECWJEmSCgZkSZIkqWBAliRJkgoGZEmSJKlgQJYkSZIKBmRJkiSpYECWJEmSCgZkSZIkqWBAliRJkgoGZEmSJKlgQJYkSZIKBmRJkiSpYECWJEmSCgZkSZIkqWBAliRJkgotBeSIGBARL2+6GEmSJKndegzIEXEsMBf4cT09JiJmNFyXJEmS1Bat9CCfAxwKLAfIzLnA8KYKkiRJktqplYD8dGY+3nglkiRJ0iagXwttbo+IU4FtImIE8H7ghmbLkiRJktqjlR7k9wH7A38GLgNWAB9ssCZJkiSpbXrsQc7MVcDH64ckSZK0ResxIEfED4DsNPtxYDbwlcz8UxOFSZIkSe3QyhCL+4Anga/WjxXAYmDfelqSJEnaYrRykd7BmXlkMf2DiLg+M4+MiDuaKkySJElqh1Z6kDsiYq81E/XrjnryqUaqkiRJktqklR7kDwG/jIh7gQBeArwnInYApjVZnCRJktTXWrmLxTX1/Y9HUgXku4HVmfm/wHnNlidJkiT1rVaGWJCZfwZ+CwwEzgcearIoSZIkqV16DMgRcVhEfAH4PTAD+AVVb3JLImKbiLg1In5YTw+MiJkRMb9+3nVDi5ckSZI2tvUG5Ig4NyLmA/8E/A44GFiamdMyc1kv9vEB4K5iegowKzNHALPqaUmSJGmT0F0P8mSq+x3/B3BpZj7Kul8Y0q2IGAb8DfC1YvbxPHtx3zTghN5sU5IkSWpSdwH5xcC5wHHAPRHxTWBARLRy54s1zgM+AjxTzBuUmYsA6uc9uloxIiZHxOyImL106dJe7FKSJEnacOsNyJm5OjOvzcx3AC8DpgM3AA9HxLd72nBEvBFYkplzNqSwzLwoM8dm5tiOjo6eV5AkSZI2gpZ6gzPzT8D3gO9FxE7AiS2s9irguIg4BnghsFNEXAosjojBmbkoIgYDSzawdkmSJGmja+k2b6XMXJGZPX5BSGZ+NDOHZeZw4K3AzzLzbVR3wphYN5tI1TMtSZIkbRJ6HZA3gqnA+PoOGePraUmSJGmT0JsL7jZYZl4HXFe/fhQY1xf7lSRJknqrpYAcEX8BDC/bZ+Z/NlSTJEmS1DY9BuT69m77AHOB1fXsBAzIkiRJ2uK00oM8FtgvM3v1JSGSJEnS5qiVi/Rup/rSEEmSJGmL10oP8u7AnRFxM/DnNTMz87jGqpIkSZLapJWAfE7TRUiSJEmbih4Dcmb+vC8KkSRJkjYFPY5BjojDI+I3EfFkRDwVEasjYkVfFCdJkiT1tVYu0jsfmADMBwYAZ9bzJEmSpC1OS18Ukpn3RMQ2mbka+EZE3NBwXZIkSVJbtBKQV0VEf2BuRPwrsAjYodmyJEmSpPZoZYjF2+t27wVWAnsCb26yKEmSJKldWrmLxe8jYgAwODM/1Qc1SZIkSW3Tyl0sjgXmAj+up8dExIyG65IkSZLaopUhFucAhwLLATJzLjC8qYIkSZKkdmolID+dmY83XokkSZK0CWjlLha3R8SpwDYRMQJ4P+Bt3iRJkrRFaqUH+X3A/sCfgcuAFcAHG6xJkiRJaptW7mKxCvh4/ZAkSZK2aOsNyD3dqSIzj9v45UiSJEnt1V0P8hHAAqphFTcB0ScVSZIkSW3UXUB+MTAemACcCvwIuCwz7+iLwiRJkqR2WO9Fepm5OjN/nJkTgcOBe4DrIuJ9fVadJEmS1Me6vUgvIrYD/oaqF3k48EXgqubLkiRJktqju4v0pgGjgWuBT2Xm7X1WlSRJktQm3fUgvx1YCewLvD9i7TV6AWRm7tRwbZIkSVKfW29AzsxWvkREkiRJ2qIYgiVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkggFZkiRJKhiQJUmSpEJjATkiXhgRN0fEbRFxR0R8qp4/MCJmRsT8+nnXpmqQJEmSeqvJHuQ/A6/LzIOAMcDREXE4MAWYlZkjgFn1tCRJkrRJaCwgZ+XJenLb+pHA8cC0ev404ISmapAkSZJ6q9ExyBGxTUTMBZYAMzPzJmBQZi4CqJ/3WM+6kyNidkTMXrp0aZNlSpIkSWs1GpAzc3VmjgGGAYdGxOherHtRZo7NzLEdHR2N1ShJkiSV+uQuFpm5HLgOOBpYHBGDAernJX1RgyRJktSKJu9i0RERu9SvBwCvB+4GZgAT62YTgelN1SBJkiT1Vr8Gtz0YmBYR21AF8csz84cRcSNweURMAh4ETm6wBkmSJKlXGgvImflb4OAu5j8KjGtqv5IkSdLz4TfpSZIkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklRoLCBHxJ4R8d8RcVdE3BERH6jnD4yImRExv37etakaJEmSpN5qsgf5aeBDmTkKOBx4T0TsB0wBZmXmCGBWPS1JkiRtEhoLyJm5KDNvqV8/AdwFDAWOB6bVzaYBJzRVgyRJktRbfTIGOSKGAwcDNwGDMnMRVCEa2GM960yOiNkRMXvp0qV9UaYkSZLUfECOiB2BK4EPZuaKVtfLzIsyc2xmju3o6GiuQEmSJKnQaECOiG2pwvG3MvOqevbiiBhcLx8MLGmyBkmSJKk3mryLRQBfB+7KzM8Vi2YAE+vXE4HpTdUgSZIk9Va/Brf9KuDtwO8iYm4972PAVODyiJgEPAic3GANkiRJUq80FpAz85dArGfxuKb2K0mSJD0ffpOeJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSYXGAnJEXBwRSyLi9mLewIiYGRHz6+ddm9q/JEmStCGa7EG+BDi607wpwKzMHAHMqqclSZKkTUZjATkzrwce6zT7eGBa/XoacEJT+5ckSZI2RF+PQR6UmYsA6uc91tcwIiZHxOyImL106dI+K1CSJElbt032Ir3MvCgzx2bm2I6OjnaXI0mSpK1EXwfkxRExGKB+XtLH+5ckSZK61dcBeQYwsX49EZjex/uXJEmSutXkbd4uA24EXh4RD0XEJGAqMD4i5gPj62lJkiRpk9GvqQ1n5oT1LBrX1D4lSZKk52uTvUhPkiRJagcDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklQwIEuSJEkFA7IkSZJUMCBLkiRJBQOyJEmSVDAgS5IkSQUDsiRJklRoS0COiKMjYl5E3BMRU9pRgyRJktSVPg/IEbENcAHwBmA/YEJE7NfXdUiSJEldaUcP8qHAPZl5X2Y+BXwHOL4NdUiSJEnr6NeGfQ4FFhTTDwGHdW4UEZOByfXkkxExrw9qk3pjd+CRdhexJfhouwuQ1DR/X25E/s7cqPbuamY7AnJ0MS/XmZF5EXBR8+VIGyYiZmfm2HbXIUmbOn9fanPTjiEWDwF7FtPDgIVtqEOSJElaRzsC8m+AERHxkojoD7wVmNGGOiRJkqR19PkQi8x8OiLeC/wXsA1wcWbe0dd1SBuBQ4AkqTX+vtRmJTLXGf4rSZIkbbX8Jj1JkiSpYECWJEmSCgZkbdUi4slO06dHxPn163Mi4uGImFs/pvawrc7t50bELhHx2oh4vJj30yaPSZKa9Hx/b9btl3b6XblfRAyPiD92mt+/r45LKrXjPsjS5uTzmflvz6d9RAD8IjPfuFErk6RN03p/b0bEmtzx3cx8b6dlw4F7M3NMs+VJPTMgS70UEQ8A3wX+qp51ambe076KJGnTFRGXAI8BBwO3AL9ra0FSCwzI2toNiIi5xfRAnntf7r+PiLfVr8/OzP+qX6/IzEMj4h3AecAbu2i/LDPXhOi/LPZzRWaeuzEPQpL6UK9+b9bP+wKvz8zVEXE68JaIeHWxzhH18z7Ftn+Vme/ZqJVLLTIga2v3x/LjvPoXd/l1qOv7qPCy4vnzLbR3iIWkLUWvfm9GxASqjoHVRZuuhliAQyy0ifAiPWnD5HpeS5LWtbLdBUi9YUCWNsxbiucb21mIJEnauBxiIW2Y7SLiJqo/MicU88uxdwAn9GlVkrR56DwG+e+Ahe0qRurMr5qWeqm+i8XYzHyk3bVIkqSNzyEWkiRJUsEeZEmSJKlgD7IkSZJUMCBLkiRJBQOyJEmSVPA2b5K0GYqI1cDvgG2Bp4FpwHmZ+UxbC5OkLYABWZI2T2u/7jci9gC+DewMfLKdRUnSlsAhFpK0mcvMJcBk4L1R+T8RcTFARBwQEbdHxPbtrVKSNh8GZEnaAmTmfVS/0/cAzgNeFhEnAt8A/jYzV7WxPEnarDjEQpK2HAGQmc9ExOnAb4GvZOav2lqVJG1m7EGWpC1ARLwUWA0sqWeNAJ4EhrStKEnaTBmQJWkzFxEdwIXA+ZmZEbEz8AXgSGC3iDiprQVK0mbGr5qWpM1QF7d5+ybwuXp4xcXA3Mz8YkTsCfw38Bf1xXySpB4YkCVJkqSCQywkSZKkggFZkiRJKhiQJUmSpIIBWZIkSSoYkCVJkqSCAVmSJEkqGJAlSZKkwv8HA1+Uxf1LP3AAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 720x432 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Assuming hf_ce_lab is your DataFrame\n",
    "hf_ce_lab['dx'] = np.where(hf_ce_lab['icd_code'].str.startswith(('I502', '4282')), 'HFrEF', 'HFpEF')\n",
    "\n",
    "# Calculate the mean values for each group\n",
    "mean_values = hf_ce_lab.groupby('dx')['age'].mean()\n",
    "\n",
    "# Create a bar plot\n",
    "plt.figure(figsize=(10, 6))\n",
    "ax = mean_values.plot(kind='bar', color='skyblue')\n",
    "plt.xlabel('Dx')\n",
    "plt.ylabel('Mean Age')\n",
    "plt.title('Mean Age Stratified by ICD Code Prefixes')\n",
    "plt.xticks(rotation=0)\n",
    "\n",
    "# Add direct labels (values) on top of the bars\n",
    "for bar, label in zip(ax.patches, mean_values.values):\n",
    "    height = bar.get_height()\n",
    "    ax.annotate(f'{label:.2f}', (bar.get_x() + bar.get_width() / 2, height),\n",
    "                ha='center', va='bottom')\n",
    "    \n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 204,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Figure size 720x432 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAu0klEQVR4nO3de5xVZdn/8c9XQEZQTkKmAkKIJKiZIigHGRVT00R+iWfEQ1g9pdmTmWRPWsmTPOahtCxSAw+gSB4QExUQkDwgCCqgpCkJQoggAuYB8Pr9sdaMe4YZ2AMzs9cw3/frtV9773vde61rrbX3vva6173vpYjAzMwsa3YqdABmZmYVcYIyM7NMcoIyM7NMcoIyM7NMcoIyM7NMcoIyM7NMcoKyWiepg6SQ1LAWljVN0rdqejmFJumPkv4n5/l3Ja2QtF7S7un9l7Zx3iFp30qm1Zfte7Wku6t5nrX2OairnKDqEEmLJX0qqXW58nnpG71DLcdTLGlpDS9jmKQZFZS3TrfFATW5/G0h6UJJr0lalyaJRyXtlk4bJema7Zz/eZJm5pZFxHci4lfp9EbADcDXImLXiFiV3r+5PcutSTlf1uvT2wpJEyUdW+jYtqY2Pgf1lRNU3fMWcGbJE0kHArsULpwadxfQS1LHcuVnAK9ExPwCxFQpSf2A/wXOjIjdgP2BcVV4fXX8mt4DKAIWVMO8aluLiNgV+ArwJPCgpPMKG5IVihNU3XMXcG7O8yHAnbkVJDWW9BtJb6e/RP8oaZd0Wsv0l+lKSe+nj9vmvHaapF9J+nt6BPBE+SO2fEg6UdJcSWslLZF0dQXVLpC0TNJyST+qaD4RsRSYCgwuN+lcYPTW1qdcTGWaaco3sUhqLun2NJ53JF0jqUE6bV9J0yV9IOk9SfdVsuqHAc9GxNw0/tURMToi1km6CDgbuDw9SngknfdiST+R9DLwoaSGkq6Q9M90HyyUNDCtuz/wR+CIdB5r0vJRabz7AYvSWNZImppOL22m29L7I53+43QbLJN0QSXrmauTpFnptnlYUqt0Po9KurjcPnhZ0ilbm2FE/DsifgtcDYyQtJOkTpJWSzokndde6b4ormgelW3DdNp5kmam2+F9SW9JOiFnesd0f6+T9CRQ5c9AOp9q+RzUWxHhWx25AYuB/iRfQPsDDYAlwD5AAB3SejcBE4BWwG7AI8Cv02m7A98EmqTT7gceylnGNOCfwH4kR2bTgGsriacYWLqFaQeS/Ag6CFgBnJJO65DGOxZomtZbCfSvZF5nA6/nPO8CfAq0yXN9vpU+vhq4O2daSRwN0+cPAX9KY/oCMAv4djptLHBluj5FQJ9KYu0LfAT8AugNNC43fRRwTQX7dR7QDtglLRsE7JUu73TgQ2DPdNp5wMzK5lt+vdKyAPbN4/1xfLqvDki3w5jc11awvtOAd3Lq/7VkGwOnAc/n1P0KsArYuYL5bBZzWv6ltHz/9PlQ4NV0fz8O/GYLn5etbcMN6fwaAN8FlgFKpz9L0kzaGDgSWJf73inE56A+3goegG9V2FmfJ6ifAb9Ov0yeBBqmb/QOgNIPYqec1x0BvFXJPA8G3s95Pg34Wc7z/wImVfLaSj+YFdS9CbgxfVzywfxyzvT/A26v5LVNgLVAr/T5cODhKqzPVhMUSbPYJ6QJIp1+JvBU+vhOYCTQNo91PYHkS38NsD79omuQThtFxQnqgq3Mcx4wIH18HtuYoLb2/gDuIOcHCckPla0lqNz6XUl+PDQg+XJfDXROp/0G+EMl89ks5rS8KC3vnVM2AXgFeJlyPwCquA3fKPceC+CLQHtgI9A0Z/oYtiFBVefnoD7e3MRXN90FnEXyIbuz3LQ2JB+2OZLWpE1Ak9JyJDWR9CdJ/5K0FpgBtChpykr9O+fxf4BdqxqgpJ6Snkqb3j4AvsPmzSRLch7/i+TX7mYi4j8kR0bnShLJEdXoKqxPPvYBGgHLc7bbn0iOpAAuJ/lynyVpwZaaviLisYj4BskRygCS/bS1nm652wJJ5yrp/FISywFsYzNTOVt8f5Dsg/L7ZWvK128EtI6IT0jOv50jaSeShH9XFePdO71fnVP2Z5LtcXO6jArlsQ1L3+fpewyS9/peJD9yPiy3XlVWnZ+D+sgJqg6KiH+RdJb4OvBAucnvkTQxdYuIFumteSQnngF+RNJE1jMimpE0X0Dy5VudxpD80m0XEc1JzpuUX0a7nMftSZpYKjOapMnoWJJmqYlpeVXW50OSL+cSX8x5vITkCKp1znZrFhHdoPScyNCI2Av4NvAHVdL1ukREfBYRU0jOoZX0Nqzs8gGl5ZL2IfkS/j6we0S0AObnrNP2XIJga++P5Wy+X7amfP0N6XIg2W9nA8cA/4mIZ6sY70DgXdLzapJ2JTkKuR24uuR8V3l5bMMtWQ60lNQ0pyyf7VCR6v4c1CtOUHXXhcDR5X7lERGfkXwwb5T0BQBJe0s6Lq2yG8kX1Jr0w33V9gYiqajcTelyVkfEx5J6kBzxlfc/6RFQN+B8oLKOBwBPkzSZjQTujYhPt2F95gFHSmovqTkwrGRCRCwHngCul9Qs56R8v3QdB+nzzhfvkySJTRVsiwGSzlDSeUPpuvcDnkurrCA5r7IlTdP5r0zneT6fJ7iSebSVtPNW5rOZPN4f44DzJHWV1IT83h/n5NT/JTA+Ijaly3sW+Ay4niocPUnaQ9L30+UPS+MG+C0wJyK+BTxK8oVfka1tw0qlPwBnA7+QtLOkPsA38oi5Nj4H9YoTVB0VEf+MiNmVTP4J8AbwXNrsNZnkKAOSX5+7kPzCfY6keWd77E2SIHJvnUjOXf1S0jrg51Tc1Xp6GucUkpPdT1S2kEga6O8kaYrLbda8iTzXJyKeJPnwvwzM4fOjsBLnAjsDC0mS0Hhgz3TaYcDzktaT/CL+QUS8VcFi3ic58f46yXmzu4HrIuKedPrtQNe02emhSuJcSPKF/ixJMjoQ+HtOlakkXcj/Lem9zeewVZW+PyLiMZJtOjWtMzWP+d1Fcg7s3yTnjC4pN/3OdB3y+aPrGkkfkpxj+jowKCLugCT5k5x3/U5a97+BQySdXX4meWzDrTkL6EnStHgVmzell1crn4P6pqTHiplZjZB0LnBRRPQpdCxWt/gIysxqTNrs918kTbNmVeIEZWY1Ij2vtZKkiW1MgcOxOshNfGZmlkk+gjIzs0yq08O8t27dOjp06FDoMMzMbDvMmTPnvYhoU768TieoDh06MHt2ZT2tzcysLpBU4UgdbuIzM7NMcoIyM8uYZcuWccghh1BUVMTGjRsBaN68OcXFxRQXF7N69WrWrVvHMcccw5FHHslJJ53EunXrysxj7ty5HHjggeSeBpk/fz69evWib9++nH/++WS9k5wTlJlZxrRq1YopU6Zw+OGHl5YdeOCBTJs2jWnTptGqVSsaNWrE3XffzYwZMxgwYACjRo0qM499992X5557jrZtP788WpcuXXjmmWd4+umnATJ/iqROn4OqyIYNG1i6dCkff/xxoUMpuKKiItq2bUujRo0KHYqZVUFRURFFRUVlyl599VX69u1L7969+fWvf01RURF77pmMxNWwYUMaNCg7gP9uu+222XxzvwsaN25Mu3btNquTJTtcglq6dCm77bYbHTp0IBmrsX6KCFatWsXSpUvp2LH81dLNrK55/fXXadmyJd/5znd45JFHOPnkkwFYv349I0eO5LHHHstrPhMmTOCnP/0p++23H7vvvntNhrzddrgmvo8//pjdd9+9XicnAEnsvvvuPpI020G0atUKSZxyyinMnz8fSH6IXnDBBQwfPpwWLVrkNZ+TTz6Z+fPns/feezNxYvnxkrNlh0tQQL1PTiW8Hcx2DB9++CGbNiVXd/n73/9Op06dAPj5z39O7969Ofroo/OazyeffH59x2bNmrHLLrtUf7DVaIdMUGZmddmGDRvo378/L730Escddxzz58/nsMMOo2/fvixZsoRTTz2VZcuWMWLECB588EGKi4u59dZbAbj00kvZtGkTS5YsoX///syfP5/+/fuzePFiJk2aRL9+/ejXrx8rVqzga1/7WoHXdCsKfc357bkdeuihUd7ChQs3K6tNV111VVx33XUFjSFXobeHWV31zjvvxFe/+tVo3LhxbNiwobR8/Pjx0bZt29LnL774YvTv3z+Ki4tj4sSJZebx1FNPRfv27aNfv34xePDgMtOuv/766N27d82uRB0BzI4KvuN3uE4SZmbVoaSr98CBA8uUjx8/vkzvt2uuuYaHH36YJk2aVDifwYMHc80115Qp++STT3jppZc2r3x18+0PfFtd/UHhll0JN/FVg+HDh9OlSxf69+/PokWLADjssMOYNm0aAMOGDePKK68sYIRmVlVFRUW0bNmyTNmjjz7Ksccey047JV+db775Jh9//DGnnnoqp5xyCitWrNhsPmPHjqVv376MHTu2tOy2225jyJAhNbsCOwAnqO00Z84c7r33XubOncsDDzzACy+8AMCoUaP47ne/y5NPPsmkSZO46qqrChypmW2v0aNHc84555Q+X7FiBYsWLWL8+PF8+9vfZvjw4WXqd+/enddee41Jkybxu9/9jpUrV7JhwwamT5+ed8eG+sxNfNvp6aefZuDAgaWH9yX/TejWrRuDBw/mG9/4Bs8++yw777xzIcM0s+00depUevXqVeaz3Lx5cw477DCaNGnC0UcfzQ033FDmNbvuuiuQ/EH2yCOP5PXXX+e1117jrLPOqtXY6yofQVWDyrpzv/LKK7Ro0aLCw34zq1vmz5/PhAkTOP7441mwYAE/+9nP6Ny5M++++y6bNm1i3rx5m/0pfu3atQBs2rSJF154gQ4dOrBo0SJuvfXW0vncfPPNhVidOsEJajsdeeSRPPjgg3z00UesW7eORx55BIAHHniAVatWMWPGDC655BLWrFlT2EDNrErKd/Xu2bMnU6dOZdKkSXTr1o1rrrmGRo0aMXToUIqLi7n88ssZNmwY8HlX73HjxtGjRw969+7NgAED2GuvvRgxYgSPP/546XwuvvjiAq9pdtXpS7537949yg92+Oqrr7L//vvXahzDhw/nzjvvZJ999qFt27Z07dqVkSNHMmXKFNq1a8fvfvc75syZw+jRo2s1LijM9jCzbVRPe/FJmhMR3Tcrd4LasXl7mNUhTlBluJOEWRUsW7aMk046iYULF7J+/XoaNkw+Qn/961+59NJLWbJkCQD33HMPv//972nVqhVjxoyhWbNmpfOYO3cu5557LuvWrWPx4sUA/Oc//2HQoEF8+OGHNG/enHHjxtG4ceNaXz+DDlc8WrBlLy7aep36xOegzKqgouv0QNk/b27YsIE//vGPzJgxg8GDB/OnP/2pTN2KrtMzadIkevbsybRp0+jRoweTJk3abNnlL2L33nvv0atXL/r168fJJ5/MRx99BMC1115Lnz59+OY3v8mHH3642XxGjBhB//79KS4u5rPPPqvw4nhmWeAEZVYF+fx58x//+AcHHnggDRs2pH///jz33HNl6u+22240bdq0TFmnTp1KB/Jcs2ZNhZdBKJ8cW7ZsycyZM5k+fTqHHnooEydOZPny5Tz99NPMnDmTs846i9tuu63MPF544QXWr1/P5MmTmTZtGjvttFOlSdes0JygzLZT+T9vrlmzprRJr3nz5rz//vtbnUfnzp15/vnn6datG7Nnz6ZXr16b1SmfHBs0aFCaFDdt2kTnzp15++236dq1KwAHH3wwzz77bJl5PPLII6xatYqjjjqKX/7ylxXO1ywrnKDMtkNFf95s0aJF6f9f1q5dm9d1ekaPHs1xxx3HggULOPHEE7n77rvzWv6sWbPo3r07U6dOpWPHjnzpS19i1qxZbNy4kalTp26WHFesWEGLFi146qmnWLhwIS+++GL+K2tWy2osQUm6Q9K7kuZXMO0ySSGpdU7ZMElvSFok6biaisusOlX058399tuP+fPns2nTJiZPnpxX01lE0KpVKwBat27NBx/k16OqR48ezJ49m4EDB3LHHXfQpk0bzjnnHPr378/rr7/OHnvsUaZ+8+bN6devHwBHHXUUr776ahXX2Kz21GQvvlHALcCduYWS2gHHAm/nlHUFzgC6AXsBkyXtFxGbtjeI6u6Rs/jaE7daZ9ddd2X9+vWlz0eNGsXs2bO55ZZbuPrqq/nzn/9MmzZtADj++OO59tprK51X+foA06ZNY968eQwYMKD0n+utW7dm8uTJ27palqcNGzZwwgknlP5583//93+55JJLAOjTp0/pqNVDhw6lb9++tGzZkjFjxgDJnzevv/56li1bxvnnn196nZ7bbruNs846i9NPP5277rqLRo0acd999201lk8//bT0yK1Zs2alF7QbOnQoQ4cOZdSoURx66KFlXtOrVy9efvlljjvuOObNm8fgwYOrbduYVbcaS1ARMUNShwom3QhcDjycUzYAuDciPgHekvQG0AN4toLX13k//OEPueyyy7a7ft++fTN/yeYdTaNGjSr9ITBz5szSx4MHD97sy/+mm24CoF27dhXO4/HHH9/isitKjpdffnlpR4e77roLgEGDBrF69WoOOuggrr/+euDz5HjSSScxdOhQ+vXrx5e//GV69epV4Xx79uyZ9zYxqym1+j8oSScD70TES+XGr9sbyO3qtDQtqzc6dOjA6aefzlNPPQXAmDFj2HfffQsclZUo1H9jco/YK0qO06dP3+w1999//2ZlJckR4C9/+UuZaVtKumaFVGsJSlIT4EqgomsMVzTaaoVDXEi6CLgIoH379tUWX3X66KOPOPjgg0ufr169unSUc4Abb7yx9CT4iBEjOO645JRbs2bNmDVrFnfeeSeXXnpp6dFRbv2WLVuWJrGnn366dDmDBg3yNad2RPV0ZAEzqN0jqE5AR6Dk6Kkt8KKkHiRHTO1y6rYFllU0k4gYCYyEZKijmgx4W+2yyy7Mmzev9HnJOagSlTXZnXnmmaX3P/zhD7da3018ZrYjq7Vu5hHxSkR8ISI6REQHkqR0SET8G5gAnCGpsaSOQGdgVm3FlhW5zZ6VXcLDzKy+qMlu5mNJOjl0kbRU0oWV1Y2IBcA4YCEwCfhedfTgq2tKem7dd999HHHEEQWOxsyssGqyF9+ZW5neodzz4cDwimtvu3y6hWfFJ598Qs+ePfnss88YO3ZsaXnuOSiAhx56qADRmZnVLl9uIyM6dOjA7Nmzad269dYrV0Fd3R5ZU7BefEUFvDR4Pe0kUdjRzOvn/q7schse6sjMzDLJ14PKiJLrApmZWcJHUGZmlklOUGZmlklOUGZmlklOUGZmlkk7fieJ6h7LLI+umNt7uY1Ro0bx4x//mL33/ny83DFjxtCkSRP2339/unTpUlo+a9asMhfLMzPbUez4CSqDtnS5jY0bNwJw+umnc8stt5SZtnjxYjp16lRmnD8zsx2VE1QGnHfeebRq1Yq5c+dyyCGHcOCBBxY6JDOzgnOCqgFVvdwGwD/+8Q8mT55MgwYNGDVqFPfdd1+ZC+A9+2xy7cZ//vOfpfPu3bs3v//972t4bczMCsMJqgZU9XIbY8eOZdCgQTRo0KC0rKImPsBNfGZWb7gXX0Y0bdq00CGYmWWKE5SZmWXSjt/EV0dHZC5/DuoPf/gDe+21VwEjMjOrXb7cxg7O26N6+HIb9Ycvt1H7fLkNMzOrU5ygzMwsk3bIBFWXmy2rk7eDmdVlNZagJN0h6V1J83PKrpP0mqSXJT0oqUXOtGGS3pC0SNJx27rcoqIiVq1aVe+/nCOCVatWUVRUVOhQzMy2SU324hsF3ALcmVP2JDAsIjZKGgEMA34iqStwBtAN2AuYLGm/iNhU1YW2bduWpUuXsnLlyu1egbquqKiItm3bFjoMM7NtUmMJKiJmSOpQruyJnKfPAaemjwcA90bEJ8Bbkt4AegDPVnW5jRo1omPHjtsWtJmZZUYhz0FdADyWPt4bWJIzbWlathlJF0maLWm2j5LMzHZcBUlQkq4ENgL3lBRVUK3Ck0gRMTIiukdE95JrKpmZ2Y6n1keSkDQEOAk4Jj7vybAUaJdTrS2wrLZjMzOz7KjVIyhJxwM/AU6OiP/kTJoAnCGpsaSOQGdgVm3GZmZm2VJjR1CSxgLFQGtJS4GrSHrtNQaelATwXER8JyIWSBoHLCRp+vvetvTgMzOzHUdN9uI7s4Li27dQfzgwvKbiMTOzumWHHEnCzMzqPicoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLJCcoMzPLpBpLUJLukPSupPk5Za0kPSnp9fS+Zc60YZLekLRI0nE1FZeZmdUNNXkENQo4vlzZFcCUiOgMTEmfI6krcAbQLX3NHyQ1qMHYzMws42osQUXEDGB1ueIBwOj08WjglJzyeyPik4h4C3gD6FFTsZmZWfbV9jmoPSJiOUB6/4W0fG9gSU69pWnZZiRdJGm2pNkrV66s0WDNzKxw8kpQkvpIOj993EZSx2qOQxWURUUVI2JkRHSPiO5t2rSp5jDMzCwrtpqgJF0F/AQYlhY1Au7exuWtkLRnOt89gXfT8qVAu5x6bYFl27gMMzPbAeRzBDUQOBn4ECAilgG7bePyJgBD0sdDgIdzys+Q1Dg9OusMzNrGZZiZ2Q6gYR51Po2IkBQAkprmM2NJY4FioLWkpcBVwLXAOEkXAm8DgwAiYoGkccBCYCPwvYjYVNWVMTOzHUc+CWqcpD8BLSQNBS4A/ry1F0XEmZVMOqaS+sOB4XnEY2Zm9cBWE1RE/EbSscBaoAvw84h4ssYjMzOzei2fIyjShOSkZGZmtWarCUrSOj7v8r0zSS++DyOiWU0GZmZm9Vs+TXxleuxJOgWP8mBmZjWsyiNJRMRDwNHVH4qZmdnn8mni+385T3cCulPJKA9mZmbVJZ9OEt/IebwRWEwyuKuZmVmNyecc1Pm1EYiZmVmuShOUpJvZQlNeRFxSIxGZmZmx5SOo2bUWhZmZWTmVJqiIGF3ZNDMzs5qWTy++NiSX2+gKFJWUR4S7mpuZWY3J539Q9wCvAh2BX5D04nuhBmMyMzPLK0HtHhG3AxsiYnpEXAAcXsNxmZlZPZfP/6A2pPfLJZ1IcqXbtjUXkpmZ2Za7mTeKiA3ANZKaAz8CbgaaAT+spfjMzKye2tIR1DuSHgbGAmsjYj5wVO2EZWZm9d2WzkHtT/JfqP8Blki6SVLP2gnLzMzqu0oTVESsiog/RcRRJJfXeAu4SdI/JfnS7GZmVqPyutxGRCwDbgduBdYB39qehUr6oaQFkuZLGiupSFIrSU9Kej29b7k9yzAzs7ptiwkqTRyDJD0A/BM4BhgG7LWtC5S0N3AJ0D0iDgAaAGcAVwBTIqIzMCV9bmZm9VSlCUrSGOBt4HRgDLBPRAyJiMciYtN2LrchsIukhkATkq7rA4CS4ZVGA6ds5zLMzKwO21IvvseBb0fEuupcYES8I+k3JMnvI+CJiHhC0h4RsTyts1zSFyp6vaSLgIsA2rdvX52hmZlZhmypk8To6k5OAOm5pQEkQyftBTSVdE6+r4+IkRHRPSK6t2nTprrDMzOzjMirk0Q16w+8FREr0z8CPwD0AlZI2hMgvX+3ALGZmVlGFCJBvQ0cLqmJJJF0vHgVmAAMSesMAR4uQGxmZpYR+YzFh6ReQIfc+hFx57YsMCKelzQeeBHYCMwFRgK7AuMkXUiSxAZty/zNzGzHkM/1oO4COgHzgJLeewFsU4ICiIirgKvKFX9CcjRlZmaW1xFUd6BrRERNB2NmZlYin3NQ84Ev1nQgZmZmufI5gmoNLJQ0i6QZDoCIOLnGojIzs3ovnwR1dU0HYWZmVt5WE1RETK+NQMzMzHJt9RyUpMMlvSBpvaRPJW2StLY2gjMzs/orn04StwBnAq8Du5BcauOWmgzKzMwsrz/qRsQbkhqko5j/RdIzNRyXmZnVc/kkqP9I2hmYJ+n/gOVA05oNy8zM6rt8mvgGp/W+D3wItAO+WZNBmZmZ5dOL71+SdgH2jIhf1EJMZmZmefXi+wbJOHyT0ucHS5pQw3GZmVk9l08T39VAD2ANQETMIxnZ3MzMrMbkk6A2RsQHNR6JmZlZjnx68c2XdBbQQFJn4BLA3czNzKxG5XMEdTHQjWSg2LHAWuDSGozJzMwsr158/wGuTG9mZma1otIEtbWeer7chpmZ1aQtHUEdASwhadZ7HlB1LVRSC+A24ACSy8dfACwC7iPpIbgYOC0i3q+uZZqZWd2ypXNQXwR+SpJEfgscC7wXEdOr4RIcvwUmRcSXga8ArwJXAFMiojMwJX1uZmb1VKUJKiI2RcSkiBgCHA68AUyTdPH2LFBSM+BI4PZ0OZ9GxBpgADA6rTYaOGV7lmNmZnXbFjtJSGoMnEhyuY0OwO+AB7ZzmV8CVpKMiv4VYA7wA2CPiFgOEBHLJX2hkpguAi4CaN++/XaGYmZmWVXpEZSk0ST/dzoE+EVEHBYRv4qId7ZzmQ3Ted4aEV8lGYA27+a8iBgZEd0jonubNm22MxQzM8uqLZ2DGgzsR3J084yktelt3XZeUXcpsDQink+fjydJWCsk7QmQ3r+7HcswM7M6bkvnoHaKiN3SW7Oc224R0WxbFxgR/waWSOqSFh0DLAQmAEPSsiHAw9u6DDMzq/vyuqJuDbgYuCe9EOKbwPkkyXKcpAuBt4FBBYrNzMwyoCAJKh0RvXsFk46p5VDMzCyj8hmLz8zMrNY5QZmZWSY5QZmZWSY5QZmZWSY5QZmZWSY5QZmZWSY5QZmZWSY5QZmZWSY5QZmZWSY5QZmZWSY5QZmZWSY5QVWzG264gT59+rBhwwaOOOIIdt11V954443N6n322WdcdtllHHPMMQwaVHZc3EsuuYRzzjmntkI2M8skJ6hq9Mknn/DSSy8B0LBhQx566CFOPfXUCuuOHz+e/fffnylTpnD//feXlq9YsYLFixdv0/JLkiPAddddR58+fTj77LPZsGFDmXrXXnstxcXFFBcX07RpU1avXs0HH3zASSedRHFxMTfddNM2Ld/MrDo5QVWj2267jSFDkktaSWKPPfaotO7EiRNZuHAhxcXF/PnPfy4tv/HGG7n44ourvOzc5Lhy5UqeeuopZs6cyUEHHcRDDz1Upu4VV1zBtGnTGD9+PIcddhitWrVi5MiRnH322UybNo0ZM2bw3nvvVTkGM7Pq5ARVTTZs2MD06dM5+uij86q/YsUKunTpwuTJk7nnnntYsWIFq1evZuXKlXTu3LnKy89NjrNmzaK4uBiA/v3789xzz1X4mgkTJnDyyScD8Oabb3LQQQcB0K1bN2bPnl3lGMzMqpMTVDW56667OOuss/Ku37x5c/r160fDhg054ogjeOONN/jtb3/L97///Sovu3xyXLNmDc2aNStdzvvvv1/h6x588EEGDhwIQJcuXZg+fTqbNm1ixowZlb7GzKy2OEFVk0WLFnHrrbdy/PHHs2DBAm6++eYt1u/Vqxcvv/wyAC+//DL77LMPb731FsOGDWPIkCFMnTqVcePG5bXs8smxRYsWrF27FoC1a9fSokWLzV6zfv163nvvPTp27AjA0KFDeeaZZzjhhBPYa6+9ttg8aWZWG5ygqsmIESN4/PHHmTRpEt26dePiiy/mtNNO44knnmDIkCE8/PDDAKXnly688ELGjh1L79696dmzJ23btuXOO+9k0qRJjB49mqOPPprTTjstr2WXT46zZ89m+vTpAEyePJnDDz98s9f87W9/44QTTih93rRpU+6++24ee+wxPvvsswpfY2ZWmxQRhY5hm3Xv3j18rqSsPn36MHPmTEaMGMEjjzxC+/btGTVqFDvvvDMXX3xx6ZHdmWeeyZVXXskBBxwAwJw5c7jsssuQxOWXX87xxx9fyNXInA5XPFqQ5S4uyr/ZuNpd/UHhll1AhdrXUH/3t6Q5EdF9s/JCJShJDYDZwDsRcZKkVsB9QAdgMXBaRGzxRIgTlNUWJ6j6wwmq9lWWoBoWIpjUD4BXgWbp8yuAKRFxraQr0uc/KVRwW1PQN/G1JxZs2WZmtaUgCUpSW+BEYDjw32nxAKA4fTwamEaGE1RBXd28gMuun7+qzaz2FaqTxE3A5cBnOWV7RMRygPT+CxW9UNJFkmZLmr1y5coaD9TMzAqj1hOUpJOAdyNizra8PiJGRkT3iOjepk2bao7OzMyyohBNfL2BkyV9HSgCmkm6G1ghac+IWC5pT+DdAsRmZmYZUetHUBExLCLaRkQH4AxgakScA0wAhqTVhgAP13ZsZmaWHVn6o+61wLGSXgeOTZ+bmVk9Vchu5kTENJLeekTEKuCYQsZjZmbZkaUjKDMzs1JOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlklOUGZmlkm1nqAktZP0lKRXJS2Q9IO0vJWkJyW9nt63rO3YzMwsOwpxBLUR+FFE7A8cDnxPUlfgCmBKRHQGpqTPzcysnqr1BBURyyPixfTxOuBVYG9gADA6rTYaOKW2YzMzs+wo6DkoSR2ArwLPA3tExHJIkhjwhUpec5Gk2ZJmr1y5stZiNTOz2lWwBCVpV+CvwKURsTbf10XEyIjoHhHd27RpU3MBmplZQRUkQUlqRJKc7omIB9LiFZL2TKfvCbxbiNjMzCwbCtGLT8DtwKsRcUPOpAnAkPTxEODh2o7NzMyyo2EBltkbGAy8ImleWvZT4FpgnKQLgbeBQQWIzczMMqLWE1REzARUyeRjajMWMzPLLo8kYWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmeQEZWZmmZS5BCXpeEmLJL0h6YpCx2NmZoWRqQQlqQHwe+AEoCtwpqSuhY3KzMwKIVMJCugBvBERb0bEp8C9wIACx2RmZgXQsNABlLM3sCTn+VKgZ24FSRcBF6VP10taVEuxZYagNfBeQRb+CxVksfWV93X9Uo/39z4VFWYtQVW0haLMk4iRwMjaCSebJM2OiO6FjsNqnvd1/eL9XVbWmviWAu1ynrcFlhUoFjMzK6CsJagXgM6SOkraGTgDmFDgmMzMrAAy1cQXERslfR94HGgA3BERCwocVhbV6ybOesb7un7x/s6hiNh6LTMzs1qWtSY+MzMzwAnKzMwyygmqHElflHSvpH9KWijpb5L2k1QsaWIV5zVNUpW7jEo6pTpH0JA0StKpW6lztaTLypUtltQ6fbxJ0jxJCyS9JOm/Je2UTqvytsmC+rivJV2Z7sd5Oft0nqRLKqh7sKSv57HMOrn/q4ukPSSNkfSmpDmSnpU0sL5vl+rgBJVDkoAHgWkR0SkiugI/Bfao5VBOIRnqKW+SarrDy0cRcXBEdAOOBb4OXFXDy6wx9XVfR8TwdD8ezOf79OCI+F0F1Q8m2c9WifR99BAwIyK+FBGHkvQ+blvQwHYQTlBlHQVsiIg/lhRExLyIeDp9uquk8ZJek3RP+uZE0jGS5kp6RdIdkhqXn7Gkr6W/rF6UdL+kXdPya9Nf7y9L+o2kXsDJwHXpL9tO6W1S+uvsaUlfTl87StINkp4CRqS/eJ9L5/WgpJY1sZEi4l2S0Ty+X7IN6iDv68/jLZL0l3Sd5ko6SsnfPH4JnJ7GdrqkHpKeSes8I6nLti5zB3I08Gm599G/IuLm3Eoq10Ihab6kDunjc9P9+JKku9KyfSRNScunSGqflg9KX/uSpBlpWQNJ10l6Ia3/7Zpf7VoSEb6lN+AS4MZKphUDH5D8MtoJeBboAxSRDM+0X1rvTuDS9PE0oDvJ8CUzgKZp+U+AnwOtgEV83puyRXo/Cjg1Z9lTgM7p457A1Jx6E4EG6fOXgX7p418CN1U0v0rW72rgHWBezu1ToHU6fX0Fr3mf5IijGJhY6P3nfZ3fvs5Z1vr0/kfAX9LHXwbeTtf1POCWnPrNgIbp4/7AX3O2V53a/7X4PpqYPr4auCxn2nygA9AtfV+UfM5apfePAEPSxxcAD6WPXwH2Lvceugj4Wfq4MTAb6FjobVMdt0z9D6oOmBURSwEkzSN5g60D3oqIf6R1RgPfA27Ked3hJM04f09/iO9M8qW3FvgYuE3SoyRfQGWkv757AffnHKzk/mq/PyI2SWpO8oadnhPH/VVcvxsj4jc5y168lfp19egpHzv6vs7VB7gZICJek/QvYL8K6jUHRkvqTDIEWaPtWOYOSdLvSbbnp8CP83jJ0cD4iHgPICJWp+VHAP8vfXwX8H/p478DoySNAx5Iy74GHKTPzz02BzoDb23HqmSCE1RZC4AtdSb4JOfxJpLtl8+XtIAnI+LMzSZIPYBjSNqtv0/yhs21E7AmknMGFfkwj+VXO0lfItkG7wL7FyKG7eR9nRNanvV+BTwVEQPT5qlpNRRPXbIA+GbJk4j4npKORbPL1dtI2VMqRem9KDfeaCUinf93JPUETgTmSTo4ncfFEfH4Nq1BhvkcVFlTgcaShpYUSDpMUr8tvOY1oIOkfdPng4Hp5eo8B/QuqSOpiZLeYrsCzSPib8ClJCelIfmlvhtARKwF3pI0KH2tJH2lfBAR8QHwvqS+W4gDSb+WNHAL67NVktoAfyRp/qmr//T2vv7cDODs9DX7Ae1Jmp1KY0s1J2kGhqT5z5L3UZGk7+aUNamg3mLgEABJhwAd0/IpwGmSdk+ntUrLnyH5IQPJvpmZTu8UEc9HxM9JRj1vRzLyznclNUrr7CepafWsXmE5QeVIv2wHAscq6Xq8gKTtuNIBayPiY+B8kmaZV4DPSL68c+usJPlAj5X0MsmX2JdJPvwT07LpwA/Tl9wL/Dg9Gd2J5A16oaSXSH6xVXaNrCEkJ9xfJvkC/GUFdQ4E/r2FzVCZXdKT5QuAycATwC9yph8jaWnO7YhtWEat8b4u4w9Ag3Sd7gPOi4hPgKeAriWdJEiamX4t6e8kQ5HVe+n76BSgn6S3JM0iaXL9SbmqfwVapc3F3wX+kb5+ATAcmJ7u8xvS+pcA56f7dzDwg7T8OiWdWeaT/LB4CbgNWAi8mJb/iR2kdcxDHdUzkh6PiOMKHYfVPO9rq+ucoMzMLJPcxGdmZpnkBGVmZpnkBGVmZpnkBGVmZpnkBGVWRZLWV6HuZqPEV1KvZGTx+ZIekdRiu4LcfP6LJbWW1ELSf1XnvM1qihOUWTaUjCx+ALCaZAilmtACcIKyOsEJyqwaSPqGpOfTP9xOlpR72Y6vSJoq6fXckSu24Flg73S+lY1uXtGo1udJuiUnpomSisvN+1qgU3q0dp2kPSXNyDl664tZRuwQ/zY2y4CZwOEREZK+BVxOMko4wEEkg8g2BeZKejQiKhyxQlIDkvH6bk+LRgLfiYjXlYzB9geSMfx+DhwXEe9UsTnwCuCAkvH+JP0IeDwihqfLrmiYHrOCcIIyqx5tgfsk7UkygnnuSNIPR8RHwEdKrufUg+Qid7l20eejps8BntSWRzevaFTrbfECcEc6jttDETFvO+ZlVq3cxGdWPW4mGTz3QODbfD5aNWw+WnVFw7d8lB7V7EOS4L5HzujmObf9IRnVGvgZyWCh89LBRisbMbtSETEDOJJkENi7JJ271TU1qyVOUGbVI3ek7yHlpg1QctXa3UkuYvdCZTNJRyq/BLgM+IhKRjevZFTrxcDBknaS1I7kSK28MiOUS9oHeDci/kzSrHhIldbarAa5ic+s6ppIWprz/AaSkdDvl/QOyQjmHXOmzwIeJbmMxa8qO/9UIiLmpiNbn0Eyuvmtkn5GcoHAe0lGsL5OyYUDRXLJhpfSl79FctXV+cCLFcx7laS/p6NeP5bW+7GkDcB6wEdQlhkeLNbMzDLJTXxmZpZJTlBmZpZJTlBmZpZJTlBmZpZJTlBmZpZJTlBmZpZJTlBmZpZJ/x9JLdaqEPv0/wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Calculate the mean values for each group, excluding 'probnp'\n",
    "mean_values = hf_ce_lab[~hf_ce_lab['label_lab'].str.contains('pro')].groupby(['dx', 'label_lab'])['valuenum'].mean().reset_index()\n",
    "\n",
    "# Create a bar plot\n",
    "plt.figure(figsize=(10, 6))\n",
    "ax = mean_values.pivot(index='label_lab', columns='dx', values='valuenum').plot(kind='bar', rot=0)\n",
    "plt.xlabel('Lab Results')\n",
    "plt.ylabel('Mean Value')\n",
    "plt.title('Mean Lab Values Stratified by Dx and Lab')\n",
    "plt.legend(title='dx')\n",
    "\n",
    "# Add direct labels (values) on top of the bars\n",
    "for i, bar in enumerate(ax.patches):\n",
    "    height = bar.get_height()\n",
    "    plt.text(\n",
    "        bar.get_x() + bar.get_width() / 2, height + 0.1, f'{mean_values.iloc[i, 2]:.2f}',\n",
    "        ha='center', va='bottom', fontsize=8, color='black'\n",
    "    )\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Figure size 720x432 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAtPklEQVR4nO3de5wU1Z3//9ebQW4qIBeNARNYg67ihSjiFUFwI0lW0fWGSRSjWTZRQd3EWy4GN4s/TdwYjRq/GrMDGi/EVWN01QiKkQ1Kxoi3GCMqUS5R5CJoEBj8/P6oM2PR9Aw9MD3TA+/n49GPrj5V59Sp7ur+1Dl1ukoRgZmZWaVp19oVMDMzK8YByszMKpIDlJmZVSQHKDMzq0gOUGZmVpEcoMzMrCI5QNkWQ9Lpkma2dj2ai6QbJX0v9/obkt6W9L6knun5Hzax7JD0mQbmzZD0tU2td1shaaKk25q5zH7pvW3fhDyfk3Rfc9ajpTS2HxUsN0HSFU0tf6sPUJLmSVojqVdB+pz05vdr4foMT+u9viB9ZvoB/nb6YXpf0oeS1uVev1SG+pxesI7XJX0jN7/uC/lgQb7bJE3MbdNHKf9KSa9I+mqRdfWRVCtp1yLz7pV0VXNvX6kknSnpz6n+b0t6UNL2aV61pP/czPI3CK4R8fWI+EGavw3wY+BzEbFdRCxJz69vznrLKbdv1O07b0t6QNI/tXbdNibts/NbaHWXA/U/3uk9+yC9Z+9KukNS99z8Gem7v0su7UhJ83Kv50lalXvf/1vSdi20PcXcBHxF0o5NybTVB6jkDeCUuheS9gY6t151+AA4rVhwjIjL0w/TdsDXgVl1ryNiYFNXVOKR3qzcOk8AfijpswXLHCTp0EbKWJjydwUuAm6WtGd+gYhYAEwHTi2oYw/gC8DkEura7CQNI/sROSUitgf2AKY2IX/JR9ON2AnoBDT7QUgL6J4++32BR4F7JZ3eulWqDJIOALpFxFMFs/ZN79k/ADsAEwvmfwB8j8YdncrYDzgA+O7m13jTRMSHwEPAaU3J5wCVuZX137ixwJT8ApI6SrpK0pvpiORGSZ3TvB3SkeFiScvSdN9c3hmSfiDp/9IR+G8LW2wFlgPVwPc3ZWPSEdiE1Np5V9KPJLVL805P9bha0lJgoqRukqak+v9V0nfrli8UEX8EXib7kc77IbDRVkRk7gOWAXsWWWQyBQEKGAO8FBEvSLpY0mvpffyTpOMaeA826GpRQdeVpDMkvZw+s0ckfbqBah9AFqSfTduwNCImR8RKSeOALwMXpqPV36Sy50m6SNLzwAeS2jdUd0l7ADcCB6cylqf0akn/KWk34JVUl+WSHkvz67tXGts/0/wLJC2StFDSGQ1sZ96ukmZLek/Sr9NBAspajuML3uvnJR27sQIj4m8RcQ3Zj+2VktpJ2lXSUkn7pbI+mfbZ4cXKaOzzT/v2zPQ+LJP0hqTP5+b3l/REyvso0Nh3sEGSvijpWUkrJL2l1FNQ4Iz0Xi+S9M1Givs88ERDMyNiBXA/G35XrgVOUQnda+nA7yFgr2LzJe2RvhvLJb0k6ZjcvGpJ16fPfaWkp1W8h+OAtN/lv2/HS5qTW2wG8MWN1TfPASrzFNA1fVBVwMlAYd/0lcBuwCDgM0Af4NI0rx3w38CngU8Bq4DrCvJ/CfgqsCPQAfjWRuo0CThe0u6bsD0AxwGDyY6eRgP5H6UDgddTXSYBPwW6kR2tDSML1ht0wUH9Ed9uQE3BrOuB3SQd2Vil0o/ScUB34IUii9wL9JJ0WC7tVD4+YHgNGJrqexlwm6SdG1tnA/U4Fvg28C9Ab+BJ4I4GFn8aOErSZZIOldSxbkZE3AT8EvhhamUenct3CtkXsntE1DZU94h4mfVbw93zK4+IvwB1rePuETGiSB0b3D8ljSLb3/4JGAA0+hklp5HtM58Easl+ECE7gPhK3UKS9k3r+t8SyqxzD9m+t3tEvEbWov6lpC5k36PqiJjRQN6Nff4HkgXzXmQHTbdIUpp3O/BMmvcDsgPRTfEB2fvTnezz/UaRAH0E2Xv9OeDiRr4Xe/PxwccGJO0AHEv2G5W3ALiZDVtWxcrYhawH4tki87YBfgP8luwzGU/2WeR/d04he693AOaS/WasJyL+ACwh28fqfIXs4L/Oy2St6NJFxFb9AOaRfWG/C/x/wCiyboj2QAD9AJHtlLvm8h0MvNFAmYOAZbnXM4Dv5l6fBTzcQN7hwPw0/UPgrjQ9Ezi9YNnTgZlFyghgVMH6pufyvJmbVwWsBvbMpf0bMCO3fC1Zq+79VPZPAaX5/VJa+7Sep1L6bcDE3DZ9lMpYCswBxjTymfwcuClNDwDWADs2sOwcYHTh+5GvV8Hn8LU0/RBwZm5eO+DvwKcbWM/nyb7Ide/Dj4GqNK8a+M8i+9UZG9n3itY9N7++3Aa2J8iCUaP7J/AL4IrcvN3q8jZQrxkFy++ZPoMqoGP6DAekeVcBNzRQzgZ1TumdUvqhubT7yQ5Yngc6NuH7W/gezs3N65LW8wmyA8daYNvc/NuB2zb2PSyhDj8Bri7Y5n/Mzf8hcEsDeR8Fvl7k+7si7WvrgD8DfQr3Y7IDq/fIDl6OBOYV7H/vpzL+CtwAdC6y/qHA34B2ubQ7+Pi7Ww38PDfvC8CfC/fBNH0R8Ms03YPs+7RzbtkBwLpSP9uIcAsq51ayVs7pFHTvke0IXYBnUjN4OfBwSkdSF0n/T1n32Argd0D31Bqr87fc9N+BUk5YXkl25N60o47MW7npv5IdCReb14usRffXguX75F4/FRHdI+vP/gTZF+LyIuu8GdhJ0tFF5i1MZfSIiEERcWcjdZ8MnCSpE1nr6eGIeAdA0mnKBrDUfQ57sWldNZ8GrsmVs5Tsh75PsYUj4qHIWkc9yFqkp5P9SDQm/z43Z90LNbp/kn32hfvDxhQuvw3QKyJWk51/+4qybuBTWP8ouRR17/HSXNrNZO/HT9M6iirhPaz/nkXE39PkdmTvwbKI+KBgu5pM0oGSHlfWJf4eWeu38HNs7PuXtwzYvkj6fpG1pDsBPwOeTN+HehGxmKyn5j8aKPvY9J37dEScFRGriizzSeCtiPiooL7570Gpv123AUcrG4xxEvBkRCzKzd+eLKCWzAEqiYi/kg2W+AJZF0Teu2TddgPTB949IrqlH2yAbwK7AwdGRFfg8JQuNkNELCE7OvvBJmTfJTf9KWBhvujc9LvAWrIf7PzyCxqo09vA/wAbBKGIWEvWFfADNmPbI+JJsu6C0WTdBFMAlJ0juhk4B+iZvsAvNrCuuh+iLrm0T+Sm3wL+Lfd5do+IzhHx+43U7aOImA48xsd9+g3dEqA+vYS6b85tBTa2fy5iw/1hYwqXX5vWA9kBxJeBkcDfI2JWE+t7HPAOqWsr/aD9BLiF7Jxoj2KZmvj5F1oE7CBp21xaKe9DMbeTtfh2iYhuZOcPC+vQ2Pcv73myFm1R6Tv1c6A/xc8h/YisO3H/kmq+oYXALlr/nHOD3//GRHauaxbZ53sqGx647AE815QyHaDWdyYwouAoi3R0cTNwtdIwSWVDoo9Ki2xP9gOxPH25NmlwQwN+DBzChoMSNuYCZYM3dgHOBe4qtlBErCM7Ip4kafv0I/DvbHgODgBJPcl2wIZGk91K1g00qon1LTSFrAXZnaxrDWBbsh/yxakuX6WBE7/p6HIB2ZF+lbKBAfmTuzcCl0gamMrqJunEYmVJGi1pTHo/JWkI2bm6uvMCb5Odv2vMxur+NtBXUoeNlLOBEvbPqcDpkvZM53lK2T+/klv+P4C7075CCkgfAf9FE1pPknaSdE5a/yW5o/ZrgGci4mvAg2SfTTElf/6F0gFoDXCZpA7pHGexln5hnTsVPET2fV8aER+mfeFLRbJ+L/WsDCQ7n1v0+0d27m5YI+uvSvlXkZ03Ltyu5WSfw4Ub25YGPE12MHehpG2UDU45Gmish6MxU1Jd9iY7n5w3jKxrvWQOUDkR8VpEFJ78r3MR2QnCp1I33jSyVhNkR3+dyY4wnyLrXmmuOq0g68MuelTZiF+TnRCeQ/alv6WRZceT7aSvk53rup3svEWdutFl75Od6Fyc8hSr7zqyH6Cm1rfQFLIjubvqunwi4k9kX8ZZZD/oewP/10gZ/wpcQNYaGwjUt44i4l6yAHhn+jxfJDvPVMyyVNarZOcGbgN+FBG/TPNvAfZM3U73FSughLo/Rhb0/ybp3Q1L2KgG98+IeIhsH30sLfNYCeXdSnb+4W9k3UwTCuZPSdtQyh9dl0v6gOwc0xeAEyPiF5AFf7KDma+nZf8d2E/SlwsL2YTPv9CXyAZRLCXbRwu78gv1IQsM+ceuZOda/0PSSrKBKMX+cvAE2Xs9HbgqIn5bbAWRjYp9T9KBBbOeS9+3ZWSDOY6LiKUbFJC5huxcVZNFxBrgGLJ9/12yc1WnRcSfN6U8sqD0aeDe/IF+6p5s8l9F6k502xZEUpCdxJ7b2nWxLZOk04BxEXHYRhe2Rkn6HHBWRBzb2nVpDpJeI+s+n5ZLG0/WJdqklp4D1BbIAcrKKXX7PUY2em9jrRDbikg6nvSXh4KBF5vEXXxmVrJ0XmsxWRfb7a1cHasgkmaQjTg8uzmCE7gFZWZmFcotKDMzq0jNcRHLNqVXr17Rr1+/1q6GmZklzzzzzLsR0bswfasLUP369aOmpqGR5GZm1tIkFb2qh7v4zMysIjlAmVmjFi5cyH777UenTp2ora3ljTfeYOjQoRx++OF86UtfYt267D+iF154IYceeihDhw7l1VdfBeCkk05i2LBhHHbYYbzySnbR7qVLl3LSSScxYsQIJk1a/8LY7777LocccgjDhg3jmGOOYdWq7PJxhx9+OMOGDWPkyJG88847Lbj11qqacmXZLeGx//77h5mVbtWqVbF06dIYNmxYrF27NpYuXRrLli2LiIhvf/vbcf/998eSJUviiCOOiIiImTNnxnnnnRcREWvWrImIiBkzZsRZZ50VERETJkyIl19+uei6amtrY926dRERMXHixJg6dep65VRXV8cPf/jD8myotRqgJor8Xm9156DMrGk6depEp04fX0h7hx12qJ9u3749VVVVbLfddvTs2ZN169axfPlyevbsCcA222wDwPvvv88+++wDwIsvvsjll1/OW2+9xeWXX87BBx9cX15V1cc3AFi3bh0DBgxYr5xVq1YxcGCTbxzdqtauXcv8+fP58MMPW7sqra5Tp0707du3/vPcGAcoM9skCxcuZNq0aXzve9+jffv2fOYzn2H33XentraWWbOyC5yvWbOGESNGsHDhQu69N7t26O9//3v++Mc/0qNHD44//nhmzpy5XrmzZ8/mrLPOolOnTnzzm9nNaN98801OPvlkVq5cyUMPNel6o61u/vz5bL/99vTr1w9ps25w0KZFBEuWLGH+/Pn079+/pDw+B2VmTbZ69WrGjh3LzTffTPv27Xn55Zd58cUX+ctf/sLdd9/Nd77zHQA6dOjAzJkz+dWvfsWll2Y3oN5tt93YY4892GmnnWjXbsOfoCFDhlBTU8Nxxx3HL36RXbP4U5/6FLNmzeKyyy7jqquuarkNbQYffvghPXv23KqDE4Akevbs2aSWpAOUmTXZuHHjOOuss9hzzz2B7Oi4e/futGvXjl69evHee+8REaxduxaArl270rlzZyALUIsWLeKDDz6gtrZ2vXLXrFlTP12XZ+3atXV3ZF2vnLZkaw9OdZr6PriLz8watXbtWj7/+c/z3HPPcdRRR3HppZdyzz338Ne//pVrrrmGc889l+OOO47tt9+eoUOHUltbyzXXXMPq1asZNWoUkpDE9ddfD8Bll13GKaecwqpVq/j+97NbU1VXV7P33nuzbt06LrjgAtq1a0ePHj249dZbWbRoEaeeeirt2rWjY8eOVFdXt+K7YS1pq7sW3+DBg8N/1DWzlvLyyy+zxx5Nvd9o85k4cSLbbbcd3/rWt1qtDnnF3g9Jz0TE4MJl3YIya6smdmvtGmz5Jr7X2jXYqvkclJnZFmbSpEnsvvvuHHnkkfV/kD7ggAOYMWMGAJdcckn9QJZK5haUmdkW5JlnnuHOO+/k2Wefpba2lv3224/999+f6upqTjjhBK699loefvhhnn766dau6kY5QJmZbUGefPJJjjvuOLp06QLAMcccA8DAgQM59dRTOfroo5k1axYdOnRozWqWxF18ZmZbmIaGc7/wwgt0796dt99+u4VrtGkcoMzMtiCHH3449957L6tWrWLlypX85je/AeCee+5hyZIl/O53v2PChAksX768dStaAgcoM7MtyH777cfJJ5/MoEGDOP744xk6dCgAF198Mbfccgu77bYb55xzDueee24r13Tj/D8os7bKw8zLrxmGmbf2/6AqTVP+B+UWlJmZVSQHKDMzq0hlC1CSfiHpHUkv5tJ+JOnPkp6XdK+k7rl5l0iaK+kVSUfl0veX9EKad63S8BRJHSXdldKfltSvXNtiZmYtr5wtqGpgVEHao8BeEbEP8BfgEgBJewJjgIEpzw2S6u5c9jNgHDAgPerKPBNYFhGfAa4GrizblpiZWYsrW4CKiN8BSwvSfhsRddfXfwrom6ZHA3dGxOqIeAOYCwyRtDPQNSJmpdsCTwGOzeWZnKbvBkaqocH/ZmbW5rTmOagzgLpbY/YB3srNm5/S+qTpwvT18qSg9x7Qs9iKJI2TVCOpZvHixc22AWZmVj6tcqkjSd8BaoFf1iUVWSwaSW8sz4aJETcBN0E2zLxJlTUza0b9Ln6wWcubd8UXN7rMdtttx/vvv1//urq6mpqaGq677jomTpzIzTffTO/evQEYNWoUV1xxRYNlFS4PMGPGDObMmcPo0aPrb+feq1cvpk2btqmbBbRCgJI0FvhnYGR8/Ces+cAuucX6AgtTet8i6fk88yW1B7pR0KVoZmYbd/755zfpflENLT906FAeeOCBZqtXi3bxSRoFXAQcExF/z826HxiTRub1JxsMMTsiFgErJR2Uzi+dBvw6l2dsmj4BeCy2tn8dm5mVSb9+/bjooosYMmQIQ4YMYe7cuS1eh3IOM78DmAXsLmm+pDOB64DtgUclzZF0I0BEvARMBf4EPAycHRHrUlHfAH5ONnDiNT4+b3UL0FPSXODfgYvLtS1mZm3ZqlWrGDRoUP3j0ksvXW/+1VdfXT/vkUceqU/v2rUrs2fP5pxzzuG8884ruvwRRxxRn/7kk0/Wp0+aNGmz6122Lr6IOKVI8i2NLD8J2GCLIqIG2KtI+ofAiZtTRzOzrUHnzp2ZM2dO/eu6c1B1GuqyO+WUU+qfzz///I0u36a7+MzMrO3I/3OnNf7F4wBlZmZF3XXXXfXPBx98cIuv33fUNTNrQaUMC68Uq1ev5sADD+Sjjz7ijjvuqE+/+uqrue222+pf33fffWVZv2+3YdZW+XYb5bcV326jX79+1NTU0KtXr2Yt17fbMDOzNs9dfGZmtoF58+a1dhXcgjIzs8rkAGVmZhXJAcrMzCqSA5SZmVUkD5IwM2tJzf33gBKGwm/u7Taqq6u54IIL6NOnT33a7bffTpcuXdhjjz3Yfffd69Nnz55Nhw4dNnerAAcoM7OtXmO326itzW6CfvLJJ3PdddetN2/evHnsuuuu613nrzk5QJmZ2XpOP/10evTowbPPPst+++3H3nvv3Sr1cIAyM9vC1d1uo87SpUs55phj6l/nL1105ZVXAvCXv/yFadOmUVVVRXV1NXfddRczZ86szzNr1iwAXnvttfqyDz30UK6//vpmq7cDlJnZFq6pt9u44447OPHEE6mqqqpPK9bFB5S1i8+j+MzMbAPbbrtta1fBAcrMzCqTu/jMzFpSM1whvTUUnoO64YYb+OQnP1nWdfp2G2ZtlW+3UX5b8e02ysW32zAzszbPAcrMzCqSA5SZWZltbadSGtLU98EBysysjDp16sSSJUu2+iAVESxZsoROnTqVnMej+MzMyqhv377Mnz+fxYsXt3ZVWl2nTp3o27dvycuXLUBJ+gXwz8A7EbFXSusB3AX0A+YBJ0XEsjTvEuBMYB0wISIeSen7A9VAZ+B/gXMjIiR1BKYA+wNLgJMjYl65tsfMbFNss8029O/fv7Wr0SaVs4uvGhhVkHYxMD0iBgDT02sk7QmMAQamPDdIqrvGxs+AccCA9Kgr80xgWUR8BrgauLJsW2JmZi2ubAEqIn4HLC1IHg1MTtOTgWNz6XdGxOqIeAOYCwyRtDPQNSJmRdaBO6UgT11ZdwMjJakc22JmZi2vpQdJ7BQRiwDS844pvQ/wVm65+SmtT5ouTF8vT0TUAu8BPYutVNI4STWSatwPbGbWNlTKKL5iLZ9oJL2xPBsmRtwUEYMjYnDdXSPNzKyytXSAejt125Ge30np84Fdcsv1BRam9L5F0tfLI6k90I0NuxTNzKyNaukAdT8wNk2PBX6dSx8jqaOk/mSDIWanbsCVkg5K55dOK8hTV9YJwGOxtf/RwMxsC1LOYeZ3AMOBXpLmA98HrgCmSjoTeBM4ESAiXpI0FfgTUAucHRHrUlHf4ONh5g+lB8AtwK2S5pK1nMaUa1vMzKzllS1ARcQpDcwa2cDyk4BJRdJrgL2KpH9ICnBmZrblqZRBEmZmZutxgDIzs4rkAGVmZhXJAcrMzCqSA5SZmVUkBygzM6tIDlBmZlaRHKDMzKwiOUCZmVlFcoAyM7OK5ABlZmYVyQHKzMwqkgOUmZlVJAcoMzOrSA5QZmZWkRygzMysIjlAmZlZRXKAMjOziuQAZWZmFckByszMKlJJAUrSYZK+mqZ7S+pf3mqZmdnWbqMBStL3gYuAS1LSNsBt5ayUmZlZKS2o44BjgA8AImIhsH05K2VmZlZKgFoTEQEEgKRtN3elks6X9JKkFyXdIamTpB6SHpX0anreIbf8JZLmSnpF0lG59P0lvZDmXStJm1s3MzOrDKUEqKmS/h/QXdK/AtOAmzd1hZL6ABOAwRGxF1AFjAEuBqZHxABgenqNpD3T/IHAKOAGSVWpuJ8B44AB6TFqU+tlZmaVZaMBKiKuAu4G/gfYHbg0In66mettD3SW1B7oAiwERgOT0/zJwLFpejRwZ0Ssjog3gLnAEEk7A10jYlZq4U3J5TEzszaufSkLRcSjwKPNscKIWCDpKuBNYBXw24j4raSdImJRWmaRpB1Tlj7AU7ki5qe0tWm6MN3MzLYApYziWylpRXp8KGmdpBWbusJ0bmk00B/4JLCtpK80lqVIWjSSXmyd4yTVSKpZvHhxU6tsZmatoJQuvu0jomt6dAKOB67bjHUeCbwREYsjYi1wD3AI8HbqtiM9v5OWnw/sksvfl6xLcH6aLkwvtg03RcTgiBjcu3fvzai6mZm1lCZfSSIi7gNGbMY63wQOktQljbobCbwM3A+MTcuMBX6dpu8HxkjqmP4gPACYnboDV0o6KJVzWi6PmZm1cRs9ByXpX3Iv2wGDaaArrRQR8bSku4E/ArXAs8BNwHZkIwbPJAtiJ6blX5I0FfhTWv7siFiXivsGUA10Bh5KDzMz2wKUMkji6Nx0LTCP7BzSJouI7wPfL0heTdaaKrb8JGBSkfQaYK/NqYuZmVWmjQaoiPhqS1TEzMwsr8EAJemnNNKVFxETylIjMzMzGm9B1bRYLczMzAo0GKAiYnJD88zMzMqtlD/q9pZ0laT/lfRY3aMlKmfWFFOmTGHkyJEMHz6cBQsWMH78eIYPH84ZZ5zBunXZwM/Ro0fTvXt3pk2bVp/v1ltv5eCDD2bUqFH87W9/W6/M2tpaxowZwxFHHMGFF15Yn/6jH/2Iww47jC9/+cusXbu2ZTbQbCtTyv+gfkn2P6X+wGVko/j+UMY6mTXZggULeOKJJ5g+fTozZsxg4cKFrFmzhhkzZjBw4EAeeOABAG688UbOO++8+ny1tbXccMMNzJw5k0mTJnHllVeuV+69997Lvvvuy+OPP86qVat47rnnWLx4MY8//jgzZ85kn3324b777mvBLTXbepQSoHpGxC3A2oh4IiLOAA4qc73MmuSRRx5h3bp1jBw5kvHjx/P666+zzz77ADBo0CBmzZoFwM4777xeviVLltC3b1+qqqrYd999eeqpp9abX6yc2bNnM3z4cACOPPLIDfKYWfMoJUDV9V8skvRFSZ9l/UsMmbW6t99+mzVr1jB9+nS6dOlS36ICeOyxx1i2bFnRfL169eKNN97ggw8+4PHHH2fp0qXrzd99993ry3n88cdZtmwZy5cvp2vXrgB069atwbLNbPM0GKAkbZMm/1NSN+CbwLeAnwPnt0DdzErWrVs3hg0bBsCIESNYtWoVe+21F0cccQQrVqxgp512KpqvqqqKSy+9lC984Qs8+OCD7LbbbuvNP/roo1m1ahUjR46kY8eO7LTTTnTv3p0VK7LrJa9YsYLu3buXddvMtlaNtaAWSLoZ+DuwIiJejIgjImL/iLi/hepnVpJDDjmE559/HoA5c+bQv39/Lr30Uh5//HF69uzJF7/4xQbzHnPMMTzxxBMce+yxDB06dL15VVVV/PSnP2X69OlUVVXxuc99jgMOOKC+VTVt2jQOOsg93mbl0FiA2oPsv1DfA96S9BNJB7ZMtcyaZtCgQXTu3Jnhw4fzhz/8gRNOOIHhw4czcuRIOnTowIEHZrvuhAkTmDJlChdeeCE33XQTAOPHj2fEiBFMnjyZ8ePHA3DFFVewYMECFixYwPDhwxkxYgSHHHIIffv2Zccdd+Twww/nsMMOY86cORx77LGttdlmWzRlN6PdyELSJ8ku3joG2JHsDrffKXPdymLw4MFRU+P/INsWYGK31q7Blm/ie61dg62CpGciYnBheql31F0o6RZgGfDvwNeANhmgrPz6Xfxga1dhqzCvU2vXwKy8Gh3FJ6mTpBMl3QO8Rna18UvI7oRrZmZWNo1dLPZ2srvf/g64HfhSRHzYUhUzM7OtW2NdfI8A/xYRK1uqMmZmZnV8sVgzM6tIpVxJwszMrMU5QJmZWUUqaZi5pEOAfvnlI2JKmepkZma28QAl6VZgV2AOsC4lB+AAZWZmZVNKC2owsGeUcskJMzOzZlLKOagXgU+UuyJmZmZ5pbSgegF/kjQbWF2XGBHHlK1WZma21SslQE1s7pVK6k52X6m9yM5nnQG8AtxFNhhjHnBSRCxLy18CnEl2DmxCRDyS0vcHqoHOwP8C57or0sxsy7DRABURT5RhvdcAD0fECZI6AF2AbwPTI+IKSRcDFwMXSdqT7CrqA8muAThN0m4RsQ74GTAOeIosQI0CHipDfc3MrIVt9ByUpIMk/UHS+5LWSFonacWmrlBSV+Bw4BaAiFgTEcuB0UDd1SsmA8em6dFkt/dYHRFvAHOBIZJ2BrpGxKzUapqSy2NmZm1cKYMkrgNOAV4l60r7WkrbVP8ALAb+W9Kzkn4uaVtgp4hYBJCed0zL9wHeyuWfn9L6pOnC9A1IGiepRlLN4sWLN6PqZmbWUkq6kkREzAWqImJdRPw3MHwz1tke2A/4WUR8FviArDuvISpWpUbSN0yMuCkiBkfE4N69eze1vmZm1gpKCVB/T+eJ5kj6oaTzgW03Y53zgfkR8XR6fTdZwHo7dduRnt/JLb9LLn9fYGFK71sk3czMtgClBKhT03LnkLV2dgGO39QVRsTfgLck7Z6SRgJ/Au4Hxqa0scCv0/T9wBhJHSX1BwYAs1M34Mp0jkzAabk8ZmbWxpUyiu+vkjoDO0fEZc203vHAL1PL7HXgq2RBcKqkM4E3gRPT+l+SNJUsiNUCZ6cRfADf4ONh5g/hEXxmZluMUq7FdzRwFdAB6C9pEPAfm/NH3YiYQ3YJpUIjG1h+EjCpSHoN2X+pzMxsC1NKF99EYAiwHOqDS79yVcjMzAxKC1C1EfFe2WtiZmaWU8qljl6U9CWgStIAYALw+/JWy8zMtnaltKDGk11maDVwB7ACOK+MdTIzMytpFN/fge+kh5mZWYtoMEBJur+xjL7dhpmZlVNjLaiDya6BdwfwNMUvLWRmZlYWjQWoTwD/RHah2C8BDwJ3RMRLLVExMzPbujU4SCJdGPbhiBgLHER2m4sZksa3WO3MzGyr1eggCUkdgS+StaL6AdcC95S/WmZmtrVrbJDEZLLLCD0EXBYRL7ZYrczMbKvXWAvqVLKrl+8GTMguGA5kgyUiIrqWuW5mZrYVazBARURJNzM0MzMrBwchMzOrSA5QZmZWkRygzMysIjlAmZlZRXKAMjOziuQAZWZmFckByszMKpIDlJmZVSQHKDMzq0gOUGZmVpFaLUBJqpL0rKQH0usekh6V9Gp63iG37CWS5kp6RdJRufT9Jb2Q5l2r3AUDzcysbWvNFtS5wMu51xcD0yNiADA9vUbSnsAYYCAwCrhBUlXK8zNgHDAgPUa1TNXNzKzcWiVASepLdp+pn+eSRwOT0/Rk4Nhc+p0RsToi3iC7ceIQSTsDXSNiVkQEMCWXx8zM2rjWakH9BLgQ+CiXtlNELAJIzzum9D7AW7nl5qe0Pmm6MN3MzLYALR6gJP0z8E5EPFNqliJp0Uh6sXWOk1QjqWbx4sUlrtbMzFpTa7SgDgWOkTQPuBMYIek24O3UbUd6fictPx/YJZe/L7Awpfctkr6BiLgpIgZHxODevXs357aYmVmZtHiAiohLIqJvRPQjG/zwWER8BbgfGJsWGwv8Ok3fD4yR1FFSf7LBELNTN+BKSQel0Xun5fKYmVkb19gt31vaFcBUSWcCbwInAkTES5KmAn8CaoGzI2JdyvMNoBroDDyUHmZmtgVo1QAVETOAGWl6CTCygeUmAZOKpNcAe5WvhmZm1lp8JQkzM6tIDlBmZlaRHKDMzKwiOUCZmVlFcoAyM7OK5ABlZmYVyQHKzMwqkgOUmZlVJAcoMzOrSA5QZmZWkRygzMysIjlAmZlZRXKAMjOziuQAZWZmFckByszMKpIDlJmZVSQHKDMzq0gOUGZmVpEcoMzMrCI5QJmZWUVygDIzs4rkAGVmZhXJAcrMzCqSA5SZmVWkFg9QknaR9LiklyW9JOnclN5D0qOSXk3PO+TyXCJprqRXJB2VS99f0gtp3rWS1NLbY2Zm5dEaLaha4JsRsQdwEHC2pD2Bi4HpETEAmJ5ek+aNAQYCo4AbJFWlsn4GjAMGpMeoltwQMzMrnxYPUBGxKCL+mKZXAi8DfYDRwOS02GTg2DQ9GrgzIlZHxBvAXGCIpJ2BrhExKyICmJLLY2ZmbVyrnoOS1A/4LPA0sFNELIIsiAE7psX6AG/lss1PaX3SdGF6sfWMk1QjqWbx4sXNug1mZlYerRagJG0H/A9wXkSsaGzRImnRSPqGiRE3RcTgiBjcu3fvplfWzMxaXKsEKEnbkAWnX0bEPSn57dRtR3p+J6XPB3bJZe8LLEzpfYukm5nZFqA1RvEJuAV4OSJ+nJt1PzA2TY8Ffp1LHyOpo6T+ZIMhZqduwJWSDkplnpbLY2ZmbVz7VljnocCpwAuS5qS0bwNXAFMlnQm8CZwIEBEvSZoK/IlsBODZEbEu5fsGUA10Bh5KDzMz2wK0eICKiJkUP38EMLKBPJOASUXSa4C9mq92ZmZWKXwlCTMzq0gOUGZmVpEcoMzMrCI5QJmZWUVygDIzs4rkAGVmZhXJAcrMzCqSA5SZmVUkBygzM6tIDlBmZlaRHKDMzKwiOUCZmVlFcoAyM7OK5ABlZmYVyQHKzMwqkgOUmZlVJAcoMzOrSA5QZmZWkRygzMysIjlAmZlZRXKAMjOziuQAZWZmFckByszMKpIDlJmZVaQ2H6AkjZL0iqS5ki5u7fqYmVnzaNMBSlIVcD3weWBP4BRJe7ZurczMrDm06QAFDAHmRsTrEbEGuBMY3cp1MjOzZtC+tSuwmfoAb+VezwcOLFxI0jhgXHr5vqRXWqBuZmUl6AW829r12KJdptauwdbi08US23qAKrb3xAYJETcBN5W/OmYtR1JNRAxu7XqYlUtb7+KbD+ySe90XWNhKdTEzs2bU1gPUH4ABkvpL6gCMAe5v5TqZmVkzaNNdfBFRK+kc4BGgCvhFRLzUytUyaynutrYtmiI2OGVjZmbW6tp6F5+ZmW2hHKDMzKwiOUCZlYmkkPRfudffkjRR0nckzUmPdbnpCc2wztMlLU7lvSTpbkld0ryJkv4uacfc8u/npuvq8qKkX9XlM2stDlBm5bMa+BdJvfKJETEpIgZFxCBgVd10RFxbSqGSNja46a5U3kBgDXBybt67wDcbyFdXl71Svq+XUh+zcnGAMiufWrKRdueXmkFStaQbJT0p6S+S/jmln55aNb8Bfiuph6T7JD0v6SlJ+xQpqz2wLbAsl/wL4GRJPTZSlSeBz5Rab7NycIAyK6/rgS9L6taEPP2AYcAXgRsldUrpBwNjI2IEcBnwbETsA3wbmJLLf7KkOcACoAfwm9y898mC1LkNrTwFts8DLzShzmbNzgHKrIwiYgVZ8GjK+aWpEfFRRLwKvA78Y0p/NCKWpunDgFvTOh4DeuaC4F2p+/ATZEHmgoLyrwXGSupakN45BbYa4E3glibU2azZOUCZld9PgDPJuttKUfjnxLrXH+TSNnodysj+5Pgb4PCC9OXA7cBZBfnz58PGpzsEmLUaByizMkutnqlkQaoUJ0pqJ2lX4B+AYlff/x3wZQBJw4F3U2ut0GHAa0XSfwz8G238ajK2ZXOAMmsZ/0V2e4xSvAI8ATwEfD0iPiyyzERgsKTngSuAsbl5J6fh4s8DnwV+UJg5It4F7gU6lrwFZi3MlzoyqyCSqoEHIuLu1q6LWWtzC8rMzCqSW1BmZlaR3IIyM7OK5ABlZmYVyQHKzMwqkgOUWRnkrxJewrITJX2rhOXyVxv/jaTum1XJDcufJ6mXpO6SCv/Ea9biHKDM2o781caXAmeXaT3d2fAqE2YtzgHKrIVIOlrS05KelTRN0k652ftKekzSq5L+tYTiZgF9Urm7SnpY0jPpKuj/mNJPTK2t5yT9LqWdLum6XJ0eSFeiyLsC2DW11n60GZtstll8mROzljMTOCgiQtLXgAv5+N5M+wAHkV2v71lJD0bEwmKFSKoCRvLxxVxvIrvixKuSDgRuAEYAlwJHRcSCJnYHXgzslS44a9ZqHKDMWk5f4C5JOwMdgDdy834dEauAVZIeB4YA9xXkr7vaeD/gGeBRSdsBhwC/kuqvH1t3+aL/A6olTQXuafatMSszd/GZtZyfAtdFxN5kF2rtlJvX0BXM81alVs2nyQLc2WTf4eW5q5APiog9ACLi68B3gV2AOZJ6kt1EMf+9z9fBrKI4QJm1nG5kNxGE9S/uCjBaUqcURIYDf2iokIh4j+z+Ut8CVgFvSDoRQJl90/SuEfF0RFxKdqv3XYB5wKB0tfRdyFpqhVYC22/aJpo1H3fxmZVHF0nzc69/THYF8l9JWgA8BfTPzZ8NPAh8CvhBQ+ef6kTEs5KeA8aQ3XbjZ5K+C2wD3Ak8B/xI0gCye0dNT2mQdS2+ALwI/LFI2Usk/Z+kF4GHIqLwhodmLcLX4jMzs4rkLj4zM6tIDlBmZlaRHKDMzKwiOUCZmVlFcoAyM7OK5ABlZmYVyQHKzMwq0v8PCE1VrA41uxwAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "# Calculate the mean values for 'bnp' only\n",
    "mean_bnp_values = hf_ce_lab[hf_ce_lab['label_lab'].str.contains('pro')].groupby(['dx', 'label_lab'])['valuenum'].mean().reset_index()\n",
    "\n",
    "# Create a bar plot for 'bnp'\n",
    "plt.figure(figsize=(10, 6))\n",
    "ax = mean_bnp_values.pivot(index='label_lab', columns='dx', values='valuenum').plot(kind='bar', rot=0)\n",
    "plt.xlabel('Lab Result')\n",
    "plt.ylabel('Mean Value')\n",
    "plt.title('Mean NTproBNP Value Stratified by Dx and Lab (BNP only)')\n",
    "plt.legend(title='dx')\n",
    "\n",
    "# Add direct labels (values) on top of the bars\n",
    "for i, bar in enumerate(ax.patches):\n",
    "    height = bar.get_height()\n",
    "    plt.text(\n",
    "        bar.get_x() + bar.get_width() / 2, height + 0.1, f'{mean_bnp_values.iloc[i, 2]:.2f}',\n",
    "        ha='center', va='bottom', fontsize=8, color='black'\n",
    "    )\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Figure size 720x432 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAa4AAAEYCAYAAAAEZhLyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAA04UlEQVR4nO3deZwU1bn/8c/jgIyoMLJkRFGHoCIqyyW4gECGRWNURI07GnCJGqNGb1TgZ6KYhHs1mkiuy01ccgF3o4KiwYVlQAwgEAiixLih4oKIIC4oi8/vj3Nm6Gm6Z3q27mn4vl+veU0tp6uequqqp5bTp8zdERERyRc75DoAERGRmlDiEhGRvKLEJSIieUWJS0RE8ooSl4iI5BUlLhERyStKXLLdMLPhZjY713HUFzP7k5n9KqH/p2a20sy+MLPW8f93azltN7N904wrM7Pzaxt3vjCz0WZ2X67jkK3lVeIys+VmtsHM2iQNXxx3tJIsx1Ma5/t40vBucXhZNuPJF3G9rajF5/Y0s01m1jHFuIlmdnP9RFhzZnaemf3LzD6PyeNpM9s1jhtnZr+t4/S3SrrufpG7/yaObwr8ATjK3Xdx99Xx/1t1mW9DMrOSuJ98Ef9WmtlTZnZkrmOrjvb92qntvp8srxJX9DZwRnmPmXUBdspdOKwCeptZ64Rhw4B/5yielMysSa5jqCt3fx+YBpydONzMWgHHAONzEZeZfR/4L+AMd98V6Aw8UoPP18e2KQYKgVfqYVrZVuTuuwDdgOeBiWY2PLchZUT7fq64e978AcuBXwLzE4bdDFwDOFAShzWLw98FVgJ/AnaK43YDniJ86dbE7vYJ0ysDfgO8CHwOPAe0SRNPKbAiTv9ncVhBHHYtUJZQ9gDCTvkp8BpwasK4Y4FFwDrgPWB0wrhC4D5gNbAWmA8UJ6yPQQllRwP3xe6SuE7Oi+thVhx+LrAsLvuzwD4Jn3fgYuD1uOy/AToCc2JsjwA7JpQ/Dlgc4/o70DVpW10JLAE+Ax6Oy7IzsB74Fvgi/u0BHAosiPNZCfwhzTo/E3gzadjFwD9i90jgzRj/q8CJCeWGA7OT1k+TpG1/fkJ/2nWVNP8rgUlpxl0AbAQ2xGWdnLB+RsT18w3QJF3shET4NbA5TmNtHD4O+C2wP/BlXJ4vgOkJ23Pf6vaJOP4q4EPgg7jcFZ9NsUxlwH8DL8Vt+wTQKo57Grg0qfwS4IQU09lqGySsz5WEE+uOhH2mRxy3B/AJUJomtmq3f1wPawgnwT9MGN8BmBk/+zxwG3F/0r6f+32/0vqvrkBj+ivfWHHjd45flPeAfaicuMYCTwKtgF2BycB/x3GtgR8BzeO4v5Jw0CHslG8SDgY7xf4bqvny9gbmxWHHxC/F+eVf3rjB3gPOIRygehB2voMSptOFsKN2jRvvhDjuwhh/87i83wNa1ODLOyHOfyfgBOCNuO6aEE4C/p705X0SaAEcRDigTgO+C7QkHAiGxbI9gI+Bw2Jcw2I8zRJieyl+MVsRdpiLEtdb0rqcA5wdu3cBDk+zznci7Ax9kj57eew+Jc5zB+A0wgG9XeKBK91Bk4TEVd26SoqpL2GHvB44onwdJIwfB/w2xXd5MbAXW06qMoo91XTTLE9i4hpL+n3iaMJ37mDCd+UBqk9c7yeUf4wt37tTiftC7O9GOPDumGI6W8Uch383Du8c+39C+P40J+xbN1dxjKhuHW6M0ysAfkpI1JbwPfoDIcn3IxzAq0tc2veztO9X+kxNk0cu/9iSuH5JOOM7mnAm0ySu+BLA4pe1Y8LnegFvp5lmd2BN0k75y4T+i4Fnqvryxu7XgU7AQ8DQpC/vacALSZ/9M3BdmumOBW6J3eeSdEaTvD6q+fJ+N2H8FOC8hP4dgK+IZ16x/BEJ4xcCIxL6fw+Mjd3/C/wmKZ7XgO8nxHZWwrjfAX+q4ss7i3DgT3l1m1T2buDO2L0f4WrmO2nKLgaGxO7hZJ64qlxXKebzQ8JBZi3hTPIPQEEcN47UievcapYzZewJ4yumm2Z5HNiXavYJ4C8knJwRTtqqS1yJ5Q+M26CAcND/FNgvjrsZuCPNdLaKOQ4vZOvv4pPAy4Sz+GapppfhOnwjYVzzOJ/dgb2BTcDOCeMfoJrE5dr3s7rvl//l4zMugHsJt4yGE84qErUlfCEXmtlaM1sLPBOHY2bNzezPZvaOma0jrLQiMytImMZHCd1fEc4CMonpEqA/MDFp3D7AYeXxxJiGEnYYzOwwM5thZqvM7DPgIqBNwnSfBR4ysw/M7HfxQXym3kuK448JMXxKOKjtmVBmZUL3+hT95etiH+AXScu0F+Esq1xN1uN5hAPmv8xsvpkdV0XZ8cCpZlZIeN71jLt/DGBmP46VdcpjOpgt67ImMllXFdx9irsPJpxhDiF8N6ureZe4beoz9mRV7hOEbZYYyzsZTDO5fFPCgecbwm2ls8xsB8Lz6HtrGG/5Ov40YdhdhPVxa5xHShmsw4rvpLt/FTt3IayDNe7+ZdJyZUL7fvb2fSBcqeQdd3/HzN4mXJqflzT6E8JKPsjDw/xkvyCcHR3m7h+ZWXfCPWarY1j3Ei7FJ7j7V2aVJvceMNPd09WWeoBwP/2H7v61mY0lfnndfSPhbOT6WGvyb4Szm3sIZ9HNE6aze4ppe1IcY9z9/potWkrl0xpTi8/6VgPcXwfOiAe7k4BHzax10oGkvOwLZraakCDOAq4GMLN9CAe4gcAcd99sZotJvW3Lp9uccG8dKq+/Wq0rd/8WmGZm0wkHzZTLmzw8g9jTTSMT1e0THxIOPOX2zmCayeU3xvlAOLG4l/A86St3n1PDeE8k3Ip6DcDMdiFcidwDjDazx9z90+QP1XD7J/sQ2M3Mdk74zu1NZutd+37m6rTvl8vXKy4ICWtA8sLFA8ddwC1m9h2oqEb9g1hkV8JOvDbWRruuPoJx97eB7xMqiiR7CtjfzM42s6bx7xAz65wQ06fxi3so4WqSGHt/M+sSrwjXEQ4Qm+PoxcDpcXo9gZOrCfNPwCgzOyhOu6WZnVK7JeYu4KJ4xmhmtrOZHWuxCng1VgKtzaxl+QAzO8vM2sbttzYO3pzqw9EE4EagiHCLDsL9fCdUvMHMzmFL8qjE3VcRntOcZWYFZnYu4WF0uYzXlZkNMbPTzWy3uC4OJXwX5iYsb3W/p6ou9pVAezPbsZrpbCWDfeIRYLiZHWhmzclsnzgrofyvgUfdfXOc3xzCA/jfU4OrLTMrNrNL4vxHxbgB/ggsdPfzCZU//pRmEhlv/2Tu/g6hgsD1ZrajmfUBBmf4We372d338zdxufub7r4gzegRhDOguRZuB04lXGVBOHPbiXB2OJdwy6S+Yprt7h+kGP45cBRwOuFh8EeEg26zWORi4Ndm9jmhRlJiVerdgUcJX9xlhFpP5T+K/BXhYLuGcGb2QDXxTYzzfSiul6WEZzM1Ftf9Twhni2sI63t4hp/9F/Ag8Fa81bAH4XnlK2b2BeFAdbq7f13FZCYQzogfLr915O6vEg6Wcwg7SBdC7dB0fkKoTbea8ED67wkx1mRdrYnTep2wne4Dbko4u70HODAu66RUE8gg9umEqu4fmdknW0+hWmn3CXefQtgvpscy0zOY3r2EZ2wfEZ5JXZY0fkJchkx+wLvWzL4kPMM6BjjF3f8C4aSA8N24KJb9T6CHmQ1Nnkgttn+yMwkVDj4lJM/kxxBpad/P6r5fUZtGRKTemNmPgQvcvU+uY5FtT95ecYlI4xRvH14M3JnrWGTbpMQlIvUmPjdbRbhVV+XtK5Ha0q1CERHJK7riEhGRvJKXv+Mq16ZNGy8pKcl1GCIiUgcLFy78xN3bVl8yyOvEVVJSwoIF6WrEi4hIPjCzTFspAXSrUKRGPvjgA3r06EFhYSGbNm3i7bffpm/fvvTr148zzzyTzZvD7yaHDBlCUVERU6dO3Woan332GccddxylpaWMHTu2YviECRMYOHAgpaWlvP9+qgYuRASUuERqpFWrVkybNo3DDz8cgKKiIiZPnsysWbPo0KEDf/vb3wD405/+xOWXX55yGnfeeSdDhw6lrKyMWbNm8cknn/D+++8zc+ZMpk2bRllZGXvumbJJRBFBiUukRgoLC9ltt90q+nfbbTeKiooAaNKkCQUFoa3mdu3apZ3GW2+9RdeuXQE46KCDWLBgAc8++yybN29m4MCBXHrppRVXbiKytbx+xiXSWHzwwQdMnTqVX/3qV9WW7dSpEzNnzuSAAw5g1qxZHHjggaxcuZINGzYwbdo0RowYwRNPPMFJJ52UhciloW3cuJEVK1bw9ddVtmK0XSgsLKR9+/Y0bVqTRu63psQlUkfffPMNw4YN46677qJJk+p3qZ/85CdceOGFTJo0iT322IPi4mLWrFnD97//fQAGDBigSkfbkBUrVrDrrrtSUlJCUsvx2xV3Z/Xq1axYsYIOHTrUaVq6VShSRxdccAEXX3wxBx54YEbld955Z+677z6mTJnCt99+y+GHH07v3r1ZsmQJAIsXL67zji2Nx9dff03r1q2366QFYGa0bt26Xq48lbhEamDjxo0MGjSIf/7zn/zgBz9g5syZPP744/zxj3+ktLSUiRPDewQvu+wyJkyYwNVXX82dd4Ym+y6//HI2b97MwoUL6d+/P0ceeSTnnHMOzZs3p3v37uy0006UlpYyf/58Tj65urdUSD7Z3pNWufpaD3nd5FPPnj1dt1REpDFbtmwZnTt3rr7gdiLV+jCzhe7eM9Np6IpLRGQbNnr0aG6++eZch1GvGqxyhpn9BTgO+NjdD47DbiK8VXQD8CZwjruvjeNGEd5qvBm4zN2fbajYRGqqZOTTOZnv8huOzcl8RRqzhrziGkd4s2Wi54GD3b0r8G9gFICZHUh4Q+hB8TN3xNdVi4hIDY0ZM4ZOnToxaNAgXnvtNQAOOeQQysrKABg1ahTXXHNNDiOsmwZLXO4+i/AK7MRhz7n7ptg7F2gfu4cAD7n7N+7+NuFV0Ic2VGwi+Si5uamNGzfSq1cvdtllF954442Kcp06daK0tJTS0lJeffXVStMYN25cxbjddtuNxYsXM2/ePHr37k3fvn254oorsr1YUs8WLlzIQw89xKJFi3j88ceZP38+ELb9T3/6U55//nmeeeYZrrvuuhxHWnu5/B3XucDDsXtPQiIrtyIO24qZXQBcALD33ns3ZHwijUp5c1MnnngiEFrqmDRpEiNGjKhUrm3bthVn1smGDx/O8OHD2bRpEz179qRbt26sXLmS6dOnU1hYyNChQ3n55Zfp0qVLQy+ONJAXXniBE088kebNmwNw/PHHA6GVlrPPPpvBgwczZ84cdtxxx1yGWSc5qZxhZtcAm4D7ywelKJayuqO73+nuPd29Z9u2GbeCL5L3kpubMjOKi4u3Kvfpp5/Sr18/LrzwwrS/mZk1axb9+vXDzNh9990pLCwEKjdbJfkrXbXzl19+maKiIlauXJnliOpX1hOXmQ0jVNoY6lvq4q8A9koo1h74INuxiWwLZs+ezaxZs9hnn30qfkOW7PHHH6+4ciu3ZMkSPvnkk4x/SC2NU79+/Zg4cSLr16/n888/Z/LkyUDY5qtXr2bWrFlcdtllrF27NreB1kFWE5eZHQ2MAI53968SRj0JnG5mzcysA7Af8FI2YxPZVrRq1QqAE088kaVLl2413t2ZPXs2/fr1qxj26aefcskll3DPPfdkLU5pGD169OC0006je/fu/OhHP6Jv374AjBw5knvuuYf999+fSy65hJ///Oc5jrT2GrI6/INAKdDGzFYA1xFqETYDno+XsnPd/SJ3f8XMHgFeJdxC/Jm7q3lskRrasGED7k6zZs148cUX6dix41Zl5s+fT48ePSpuCW7atImzzjqLm266id133z3bIUsDuOaaa7aqNXjllVdWdF922WXZDqleNWStwjPcvZ27N3X39u5+j7vv6+57uXv3+HdRQvkx7t7R3Tu5+5SGikskXyU3NzVv3jxOPfVUnnvuOYYNG8YTTzzBmjVr6NWrF/369WPy5MlcfPHFwJbmpgAmTpxYqeX5v/71r8yfP58RI0ZQWlrKnDlzcrJ8IplSk08iGdAPkKW21ORTZWrySUREtjt6H5dIYza6ZQ7n/Vnu5i1SBV1xiYhIXlHiEhGRvKJbhSIiWVTfFX0yqcCzyy678MUXX1T0jxs3jgULFnDbbbcxevRo7rrrLspbIjr66KO54YYb0k4ruTxAWVkZixcvZsiQIRVv727Tpg1Tp06t7WJVSYlLRGQ7d8UVV1T6nVdty/ft25ennnqqPkNLSbcKRURkKyUlJYwYMYJDDz2UQw89tNIbCHJNiUtEZBu3fv16unfvXvF37bXXVhp/yy23VIx79tkt7/Bt0aIFL730EpdccgmXX355yvL9+/evGP7CCy9UDB8zZkyDLY9uFYqIbON22mknFi9eXNFf/oyrXLpbf2eccUbF/8R3telWoYiINEqJr0dJ96qUXFDiEhGRlB5++OGK/7169cpxNFvoVqGISBblU/uT33zzDYcddhjffvstDz74YMXwW265hfvuu6+if9KkSVmNS43simQgZ43sFp6Zk/kCavKpnuRrI7slJSUsWLCANm3a1Ot01ciuiIhsd3SrUEREtrJ8+fJch5CWrrhERCSvKHGJiEheUeISEZG8osQlIiJ5RZUzRESyqb7fap3Bzxbq+lqTcePGcdVVV7HnnntWDHvggQdo3rw5nTt3plOnThXDX3rpJXbccce6LlWVlLhERLZzVb3WZNOmTQCcdtpp3HbbbZXGLV++nI4dO1ZqBzEblLhERKSS4cOH06pVKxYtWkSPHj3o0qVLrkOqRIlLRGQbV/5ak3Kffvopxx9/fEV/YhNON954IwD//ve/mTp1KgUFBYwbN46HH36Y2bNnV3xmzpw5ALz55psV0z7iiCO4/fbbG3hplLhERLZ5NX2tyYMPPsgpp5xCQUFBxbBUtwqBnNwqbLBahWb2FzP72MyWJgxrZWbPm9nr8f9uCeNGmdkbZvaamf2goeISEZHq7bzzzrkOIa2GrA4/Djg6adhIYJq77wdMi/2Y2YHA6cBB8TN3mFkBIiIiSRrsVqG7zzKzkqTBQ4DS2D0eKANGxOEPufs3wNtm9gZwKDCnoeITEcmJPG11P/kZ1x133MEee+yRk1ga9LUmMXE95e4Hx/617l6UMH6Nu+9mZrcBc939vjj8HmCKuz+aYpoXABcA7L333t975513Gix+kXJ6rYnUVr6+1qShbEuvNUn1TuiUGdXd73T3nu7es/wHcyIisv3IduJaaWbtAOL/j+PwFcBeCeXaAx9kOTYREckD2U5cTwLDYvcw4ImE4aebWTMz6wDsB7yU5dhERBpEPr9pvj7V13poyOrwDxIqV3QysxVmdh5wA3Ckmb0OHBn7cfdXgEeAV4FngJ+5++aGik1EJFsKCwtZvXr1dp+83J3Vq1dTWFhY52k1ZK3CM9KMGpim/BhgTEPFIyKSC+3bt2fFihWsWrUq16HkXGFhIe3bt6/zdNRyhohIA2ratCkdOnTIdRjblMZSq1BERCQjSlwiIo3Upk2bOP300+nfvz9XX301ADfddBN9+vRh6NChbNy4sVL5cePG0alTJ0pLSyvKL1q0iC5dulBSUpLt8BuMEpeISCM1ceJEunXrxowZM1i/fj0zZ85kxowZzJ49m65duzJp0qStPnPVVVdRVlbG7373OwD23Xdf5s6dWy/PlhoLJS4RkUbqrbfeomvXrgB0796dpUuXUlpaCsCgQYOYO3fuVp8ZO3Ys/fr1Y9q0aQDsuuuujbrB3NpQ4hIRaaQ6derEzJkzAZgxYwZr166lRYsWALRs2ZI1a9ZUKn/CCSewZMkSHnvsMa688ko2b942f1WkxCUi0kgNHjyY9evXM3DgQJo1a0ZRURHr1q0DYN26dRQVFVUqX1RUxA477EDbtm3Zf//9WblyZQ6ibnhKXCIiVfjqq6849thjKS0tZciQIcybN4/S0lJKS0vp0KEDY8eOrVT+3XffZcCAAfTr149HHnkEqH0FiYKCAm699VamTZtGQUEBgwcPrrgCmzp1Kocffnil8uVJbf369bz++utsq+25KnGJiFThmWee4bDDDqOsrIxDDz2Ujz76iLKyMsrKyujatSvHHXdcpfI33ngjY8aMYcaMGdx9991s2rSp1hUk3n//fUpLSxkwYAC9e/dm7733pl+/fvTp04fFixdzwgknAHDppZcCcMstt9CrVy9KS0sZOXIkTZs25b333mPQoEEsXbqUQYMGsXz58vpYLTmlHyCLiFShY8eOLFy4EIC1a9fSunVrAL788ks++ugj9t1330rlyytUFBQUUFxczBtvvMEBBxxQq3nvueeelJWVVRo2YsQIRowYUWnYrbfeCsB1113HddddV2ncXnvtxdSpU2s1/8ZKV1wiIlXYb7/9mDdvHgcddBALFiygd+/eAEyZMoWjj05+yfuWChVfffUVc+fO3aoChdSdrrhERKowfvx4fvCDH3DVVVdx8803c9999/HjH/+YiRMnVvzIN9GoUaO48MILuf322znggAMoLi7OfGajW9Zj5DWURy8O1RWXiEgV3J1WrVoB0KZNGz777DM2btzIsmXL6Nat21bli4uLmTRpEo8++ijNmjVTO4UNQIlLRKQKZ555Jo888gilpaXcf//9DB06lOnTpzNgwIBK5corSDz99NP079+fwYMHM2rUKMxsm6wgkUuWz++I6dmzpy9YsCDXYch2oGTk0zmZ7/LCM3MyXyCvbh1tM7bTW4VmttDde2ZaXs+4REQS5OokBWB53d+xuF3QrUIREckrSlwiIpJXlLhERCSvKHGJiEheUeISEZG8osQlIiJ5RYlLRETyihKXiIjkFSUuERHJK0pcIiKSV3KSuMzsCjN7xcyWmtmDZlZoZq3M7Hkzez3+3y0XsYmISOOW9cRlZnsClwE93f1goAA4HRgJTHP3/YBpsV9ERKSSXN0qbALsZGZNgObAB8AQYHwcPx44ITehiYhIY5ZR4jKzPmZ2Tuxua2a1fjOau78P3Ay8C3wIfObuzwHF7v5hLPMh8J00sVxgZgvMbMGqVatqG4aIiOSpahOXmV0HjABGxUFNgftqO8P47GoI0AHYA9jZzM7K9PPufqe793T3nm3btq1tGCIikqcyueI6ETge+BLA3T8Adq3DPAcBb7v7KnffCDwO9AZWmlk7gPj/4zrMQ0REtlGZJK4NHl6T7ABmtnMd5/kucLiZNTczAwYCy4AngWGxzDDgiTrOR0REtkGZvAH5ETP7M1BkZj8BzgXuqu0M3X2emT0K/APYBCwC7gR2ifM6j5DcTqntPEREZNtVbeJy95vN7EhgHdAJuNbdn6/LTN39OuC6pMHfEK6+RERE0srkiouYqOqUrEREROpDtYnLzD4nPt8CdiTUKvzS3Vs0ZGAiIiKpZHKrsFINQjM7ATi0oQISERGpSo1bznD3ScCA+g9FRESkepncKjwpoXcHoCdbbh2KiIhkVSaVMwYndG8ClhNavhAREcm6TJ5xnZONQERERDKRNnGZ2a1UcUvQ3S9rkIhERESqUNUV14KsRSEiIpKhtInL3cenGyciIpIrmdQqbEt4rcmBQGH5cHdXlXgREcm6TH7HdT+h9fYOwPWEWoXzGzAmERGRtDJJXK3d/R5go7vPdPdzgcMbOC4REZGUMvkd18b4/0MzOxb4AGjfcCGJiIikV1V1+KbxDcW/NbOWwC+AW4EWwBVZik9ERKSSqq643jezJ4AHgXXuvhTon52wREREUqvqGVdnwm+5fgW8Z2Zjzeyw7IQlIiKSWtrE5e6r3f3P7t6f8BqTt4GxZvammY3JWoQiIiIJMnqtibt/ANwD/C/wOXB+QwYlIiKSTpWJy8wKzewUM3sceBMYCIwC9shGcCIiIsnSJi4zewB4FzgNeADYx92HufsUd9+crQAlM8uXL6e4uJjS0lKOOuooAC699FJKS0s599xz2by58iZ79913GTBgAP369eORRx4BYNOmTZx99tn06dOHG264IevLICKSiaquuJ4FOrr7ye7+qLt/na2gpHaOPPJIysrKeO6555g/fz4bNmygrKyMgw46iKeeeqpS2RtvvJExY8YwY8YM7r77bjZt2sSTTz5J586dmT17NrNnz+ajjz7K0ZKIiKRXVeWM8e7+eTaDkbqZMWMGffv25ZZbbuGtt96ia9euAHTv3p05c+ZUKls+vqCggOLiYt544w3mzJnDoEGDAOjfvz/z56tlLxFpfDKqnCGNX7t27fj3v//NjBkzmDp1Ku3atWPmzJkATJ8+nTVr1lQq36lTJ2bOnMlXX33F3LlzWbNmDWvXrqVFixYAtGzZcqvPiIg0Bkpc24hmzZqx884706RJE4477jhWrFjBwQcfTP/+/Vm3bh3FxcWVyo8aNYo777yTU045hQMOOIDi4mKKiopYt24dAOvWraOoqCgHSyIiUrWMEpeZ9TazM83sx+V/dZmpmRWZ2aNm9i8zW2ZmvcyslZk9b2avx/+71WUeuZBcQWLx4sWUlpZSWlpKhw4dGDt2bKXyn3/+OYMHD+aII45gwoQJACxatIguXbpQUlJSo3l//vmWu7ovvvgiHTt25Nprr2XGjBm0bt2aY489tlL54uJiJk2axKOPPkqzZs3o0KEDvXr1Ytq0aUC47XjIIYfUfCWIiDSwahOXmd0L3Az0AQ6Jfz3rON8/As+4+wFAN8JrU0YC09x9P2Ba7M87iRUkunfvTllZGWVlZXTt2pXjjjuuUtm77rqLM844g1mzZnH33XezYcMG9t13X+bOnUv79jVrx/iFF17ge9/7Hr1792aPPfbgkEMOobS0lIEDB7Ljjjty2GGh0ZNLL70UgKeffpr+/fszePBgRo0ahZkxePBgli5dSp8+fejVqxft2rWrn5UiIlKPMmkdvidwoLt7fczQzFoA/YDhAO6+AdhgZkOA0lhsPFBGeIFlXimvIHHSSSdxxRWhLeIvv/ySjz76iH333bdS2Tlz5nD77bdTUFBAt27deO211+jSpUut5nvMMcdwzDHHVBpWVla2Vblbb70VgGOPPXarq7CmTZty//3312r+IiLZkkniWgrsDnxYT/P8LrAK+D8z6wYsBH4OFLv7hwDu/qGZfSfVh83sAuACgL333rueQqof5RUkmjVrxpAhQxg4cCBdu3ZlypQpHH300VuVr3VliNEt6zPsmhn9We7mLSJCZs+42gCvmtmzZvZk+V8d5tkE6AH8r7v/B/AlNbgt6O53untPd+/Ztm3bOoRR/5IrSCxduhSAiRMnctJJJ21VXpUhRERqLpPENRo4Afgv4PcJf7W1Aljh7vNi/6OERLbSzNoBxP8f12EeOZGqgsTGjRtZtmwZ3bp126p8eWWIzZs3s3jxYjp16pTNcEVE8lK1icvdZ6b6q+0M3f0jwmtSyo/SA4FXgSeBYXHYMOCJ2s4jV5IrSBx22GFMnz6dAQMGVCpXXkHi/PPP5/7776dv376ce+65NGvWjPfee49BgwaxdOlSBg0axPLly3OwJCIijZdVV+fCzA4nvPm4M7AjUAB86e4taj1Ts+7A3XF6bwHnEJLoI8DehDYST3H3T6uaTs+ePX3BggW1DSN/6RlX1pWMfDon811eeGZO5gtoW+fA9rq9zWyhu2dcWz2Tyhm3AacDfyXUMPwxsF/twgvcfTGpq9QPrMt0sym3X+6czVpEJOcySVy4+xtmVhBbhf8/M/t7A8clIiKSUiaJ6ysz2xFYbGa/I1SL37lhwxIREUktk1qFZ8dylxCqru8F/KghgxIREUmn2isud3/HzHYC2rn79VmISUREJK1M2iocDCwGnon93ev4A2QREZFay/QHyIcCa6GiRmBJQwUkIiJSlUwS1yZ33z5/0CEiIo1ORo3smtmZQIGZ7QdcBqg6vIiI5EQmV1yXAgcB3wAPAuuAyxswJhERkbQyqVX4FXBN/BMREcmptImrupqD7n58/YcjIiJStaquuHoB7xFuD84DLCsRiYiIVKGqxLU7cCRwBnAm8DTwoLu/ko3AREREUklbOcPdN7v7M+4+DDgceAMoM7NLsxadiIhIkiorZ5hZM+BYwlVXCfA/wOMNH5aIiEhqVVXOGA8cDEwBrnf3pVmLSkREJI2qrrjOJrQGvz9wmVlF3QwDvC5vQBYREamttInL3TP5cbKIiEhWKTmJiEheUeISEZG8osQlIiJ5RYlLRETyihKXiIjkFSUuERHJK0pcIiKSV3KWuMyswMwWmdlTsb+VmT1vZq/H/7vlKjYREWm8cnnF9XNgWUL/SGCau+8HTIv9IiIileQkcZlZe0LjvXcnDB4CjI/d44ETshyWiIjkgVxdcY0Frga+TRhW7O4fAsT/30n1QTO7wMwWmNmCVatWNXigIiLSuGQ9cZnZccDH7r6wNp939zvdvae792zbtm09RyciIo1dle/jaiBHAMeb2TFAIdDCzO4DVppZO3f/0MzaAR/nIDYREWnksn7F5e6j3L29u5cApwPT3f0s4ElgWCw2DHgi27GJiEjj15h+x3UDcKSZvQ4cGftFREQqycWtwgruXgaUxe7VwMBcxiMiIo1fY7riEhERqZYSl4iI5BUlLhERyStKXCIikleUuEREJK8ocYmISF5R4hIRkbyixCUiInlFiUtERPKKEpeIiOQVJS4REckrSlwiIpJXlLhERCSvKHGJiEheUeISEZG8osQlIiJ5RYlLRETyihKXiIjkFSUuERHJK0pcIiKSV5S4REQkryhxiYhIXlHiEhGRvKLEJSIieUWJS0RE8krWE5eZ7WVmM8xsmZm9YmY/j8NbmdnzZvZ6/L9btmMTEZHGLxdXXJuAX7h7Z+Bw4GdmdiAwEpjm7vsB02K/iIhIJVlPXO7+obv/I3Z/DiwD9gSGAONjsfHACdmOTUREGr+cPuMysxLgP4B5QLG7fwghuQHfyWFoIiLSSOUscZnZLsBjwOXuvq4Gn7vAzBaY2YJVq1Y1XIAiItIo5SRxmVlTQtK6390fj4NXmlm7OL4d8HGqz7r7ne7e0917tm3bNjsBi4hIo5GLWoUG3AMsc/c/JIx6EhgWu4cBT2Q7NhERafya5GCeRwBnAy+b2eI47P8BNwCPmNl5wLvAKTmITUREGrmsJy53nw1YmtEDsxmLiIjkH7WcISIieUWJS0RE8ooSl4iI5BUlLhERyStKXCIikleUuEREJK8ocYmISF5R4hIRkbyixCUiInlFiUtERPKKEpeIiOQVJS4REckrSlwiIpJXlLhERCSvKHGJiEheUeISEZG8osQlIiJ5RYlLRETyihKXiIjkFSUuERHJK0pcIiKSV5S4REQkryhxiYhIXlHiEhGRvKLEJSIieUWJS0RE8kqjS1xmdrSZvWZmb5jZyFzHIyIijUujSlxmVgDcDvwQOBA4w8wOzG1UIiLSmDSqxAUcCrzh7m+5+wbgIWBIjmMSEZFGpEmuA0iyJ/BeQv8K4LDEAmZ2AXBB7P3CzF7LUmyNhkEb4JOczPx6y8lst1fa1tuX7Xh771OTwo0tcaVac16px/1O4M7shNM4mdkCd++Z6zik4Wlbb1+0vTPT2G4VrgD2SuhvD3yQo1hERKQRamyJaz6wn5l1MLMdgdOBJ3Mck4iINCKN6lahu28ys0uAZ4EC4C/u/kqOw2qMtutbpdsZbevti7Z3Bszdqy8lIiLSSDS2W4UiIiJVUuISEZG8kvPEZWYnmpmb2QFVlCkys4trOf2/mVlRNWWWm1mbNMNfNrPF8f+QhHFf1CaeFPMYZ2Ynpxn+dpz3P8ysV33ML5vMrNTMnordx5c34VWTbW5mJWZ2Zg3ntcDMRldTXtu8kYjbrncG5Uab2ZWx+9dmNqjho6uY9xdJ/cPN7LZ6mnba73gctz5+J141swlm1rSa6WW0PvNZzhMXcAYwm1CDcCuxGagioEaJy4Id3P0Yd19bh/j6u3t34GTgf+owndq4Ks57JPDn5JFx3WRFXefl7k+6+w2xtybbvASoNnElfHYHd+/p7qPrEK62eXbnVQrU6EDr7te6+9SGCSd7zKwJ1X/H34zfiS6EnwidWs1kS6nh+sw3OU1cZrYLcARwHgkHsXjGMMPMHgBeBm4AOsazjptimavMbL6ZLTGz6+OwEjNbZmZ3AP8A9ko8szazSWa20MxeiS1w1EQLYE2KZTAzu8nMlsYz9NMyGH5bPHt6GvhOBvOeBewbP7/czK41s9nAKWZ2lJnNiWfof43rFDO7Ic5jiZndHIedEuP5p5nNisMqnTma2VNmVhq7v4hntvOAXmZ2lpm9FLfDn1Md2Cw0kvyvGN9JCcOHx+XeBRgY1+dIM5tqZsVxm//DzNYAXwKLgI7A08CRcZ5PWria+tTMVprZIjM73cyWAZcDfQnb/BMzuzvOd4qZrTGzr83svRqeiWqbZ7bNK83XzHa1cOXYNI5vEZehqZldllD2ITMrAS4Crojz6Gtm+5jZtFhmmpntnWKeFVetZnaImf09LuNLZrZrBuu33phZWzN7zMLxaL6ZHRGHHxrjWhT/d4rDh8ftNhl4jnB86xuX/4p083H3zcBLhBaGMLPBZjYvTr98Pyph6/WZMr685u45+wPOAu6J3X8HesTuUsLBq0PsLwGWJnzuKEK1USMk36eAfrHct8DhCWWXA21id6v4fydgKdA6uUxSfMsJiXMp8BVwXMK4L+L/HwHPE6rvFwPvAu2qGH5SwvA9gLXAySnmPa58OHAKMC8hpqtjdxvCAW7n2D8CuBZoBbzGllqjRfH/y8CeScOGA7clzPcpoDR2O3Bq7O4MTAaaxv47gB8nxVxIaLJrv7htHgGeSpxP3Ob3xvF/B34N/D5u800Jy3xgXO+lMabybf4L4P/isLOA9+M2vzhhXp8Ad8fuiYSkVr7NS7TN63Wbp5vv/wEnxO4LgN/H7g+AZkllRwNXJkxzMjAsdp8LTEouV76ugB2Bt4BD4vAWQJMGOFZtBhYn/L1bvg6BB4A+sXtvYFlyLMAg4LGE9b+CLcejUuJ3N8V8S4jHPsL+NQPoGvt3S1jv5yes4+T1mTK+fP7L9e+4zgDGxu6HYv8/Yv9L7v52ms8dFf8Wxf5dCAfLd4F33H1ums9dZmYnxu694mdWVxNjf3f/xMw6AtPMrMzdE+939wEe9HA2tNLMZgKHVDG8X8LwD8xsehXzvsnMfgmsIlyVlns4/j+ccIB/0cwg7MRzgHXA18Dd8Qz/qVj+RWCcmT0CPF7NckPYWR+L3QOB7wHz47x2Aj5OKn8A8La7vw5gZvexpV3JcmcAkwi/1SsBLiGcRU4G3gGuMrM94rBE5dv8VLa05daMkLgceBU4JsUyHEW4cjuHsM13JySCqmibB5ls83TzvRu4mrCtzwF+EocvAe43s0lxXCq92HK1fi/wuyri7QR86O7zAdx9XVULVwfrPdyuA8JVE1DeNNMg4MC4jgBaxKu+lsB4M9uP8B1NfDb1vLt/muG8O5rZYsLx6lF3XxKHtwceNrN2hO9BuuNlyvjc/fMM59/o5CxxmVlrYABwsJk54WzUzezqWOTLqj4O/Le7V3oGEC+TU34u3goZBPRy96/MrIxwBpMRd3/TzFYSDhqJB9V0LVNW1WJlpj+eu8rdH00xvHwZjbADnLHVzM0OJRx4TickhwHufpGZHQYcCyw2s+6Eq5zEW8aJ6+TreLAtn9d4dx9VTcxVLVshYZsPBD4DNgAbE+a5DBhFSEATgcQdy4D/Jrzy5n/cfXpczguB9almFrf5jkA/d1+rbV7/29xDowGp5vuihVv33wcK3H1p/MixhER+PPArMzso7ZpImE0V46ya8dmwA+G4Uul7aGa3AjPc/cR4bCpLGF3V8S3Zm+7ePSaoMjM73t2fBG4F/uDuT8bv+uiaxJfPcvmM62Rggrvv4+4l7r4X4YyhT4qynwOJ962fBc61Lff29zSz6p4btATWxKR1AOHMNWNx+h0IVwWJZgGnmVmBmbUl7JQvVTP89Di8HdC/JnEkmQscYWblz0Kam9n+cb20dPe/EW6TdY/jO7r7PHe/lnDVshfh6qO7me1gZnsRXi2TyjTg5PL1bGatzCy5Red/AR3ilQqEq6tEHYEJhAR1TNzmTtg2AM3d/WV3v5Fw5V3Elm3/LOG20VxgaNzmhxNuvW1IE3NLwhXC8IRt3jxN2a1om1e/zdPNN5oAPEi4bYiZ7QDs5e4zCFdjRYS7Jcn799/Z8sx7KKEiTzr/AvYws0PiPHa1UOEhm54jJGxiDN1jZ0vCHQEItwfTSV7+lNz9Q0KlnfITicTpD6tieuniy1u5vFV4BuGhZKLHCLVrHk4c6O6rzexFM1sKTHH3q8ysMzAnXv5+QXjesZn0ngEuMrMlhHvy6W4nJpthZpsJl/kj3X1l0viJhFsb/yQchK9294/MrKrhAwjPHv4NzMwwjq24+6p4y+JBM2sWB/+S8MV9wswKCWek5Q98b4q3LYxwUPpnHP42W57rlN+qTZ7Xq/EW1nPxALQR+BkJB3V3/9pCpZenzewTwgHn4ITJ7A/cCPwN+KuZvQ+8QrjFCCHpLSVsx1eB6cB4wk54E+FZxlmEh9OnEw7AV8ZlTuUZwoHt+vi3iXB7sjra5mS2zQnbJtV8Ae4HfktIXhDuqtxnZi1j2VvilfBk4FELPz24FLgM+IuZXUW4ZXpOFetjg4VKMLea2U6Eq+9BhGNCtlwG3B6PLU0IJyoXEW5xjjez/yR8l9NZAmwys38C49z9lirKTgJGm1lfwhVW+X40l3CSBeG2e/L6TBVf3lKTTyLSICzU+hvi7mfnOhbZtuS6coaIbIPi850fkrrCjEid6IpLRETySmNoOUNERCRjSlwiIpJXlLhERCSvKHHJNslC6/P3JvQ3MbNVFluQ39ZZFS2EW2grb5WFtuz+ZVW0j1eHeT+V0L1NN/gq2afEJduqLwmtsuwU+49ky481syoHP4iF6lsIfzg2YXQEcE38IXIu4hCpMSUu2ZZNITQxBOEH7+U/hMXMdjazv1hoLXtR/LFm+RsGXrDQ8vo/yq8WzKydmc2KVylL4w9AsYT3NJnZyWY2LnaPM7M/mNkM4EYz62hmz1h4O8ELsSWP8nL/a+FtCG+Z2fdjXMvKpxXLpWsRfrmZXR+Hv2xmB1iKFsLTrSB3Xw28QWgMGEvRGnz8G2dbWr2/IpYtM7OesbuNmS1PnHZN4hCpCf2OS7ZlDwHXxttWXYG/EF59AnANMN3dz7XwotGXzGwqoYmoI2MrIPsRkl1PQosuz7r7GAuv9sik6aj9gUHuvtnMpgEXufvrFtoOvIPQmgaEVr4HENrvm0y4Cjqf0Lhtd0JL4r+M0/rSzEYA/0loWR/gE3fvYeFlq1e6+/lm9idCa/Y3VxWghVeGFAJLLLRGcxpwhLtvtPB6oKGE1k32dPeD42eKMlh23H15pnGI1IQSl2yz3H1JPOs/g9DMVKKjgOMtvlGXcPDem/DajdtiwthMSD4A8wnNEDUlvGZjcQYh/DUmrV0It8v+alta6G6WUG6yu7uZvQysdPeXAczsFUITVe1J3SJ8ufJW3xeS8A60apxmZv0Jrav/JCbqdK3BTwa+G39U/DSh7TuRnFHikm3dk8DNhGctrROGG/Ajd38tsbCZjQZWAt0It9K/BnD3WWbWj3Dr8V4zu8ndJ1C5ZfLklufLWwDfAVib+FqMJN/E/98mdJf3NyEk0JQtwid9fjOZ79MPu/slZtaL0LbkFKpoDd7MugE/ILRVeCqhwePEVuYzbnVfpK70jEu2dX8Bfl1+FZPgWeBSi5cWZvYfcXhLwvudvgXOJjQMi4VW0T9297uAe4AesfxKM+tsoRHaE0khviPqbTM7JU7LYiLIVMoW4av5TKYtjs8hvPPq56RpDd7CG8R3cPfHgF+xZdmXE67QILztodZxiNSEEpds09x9hbv/McWo3xBaf19ioUX638ThdwDDzGwu4TZh+VVTKeF9VosIbzoun+ZIwssTpwMfVhHKUOA8Cy2AvwIMqcEyrCK8FuNBCy18z2VLi/rpTAZOzLBSxI2EFtjfIzxLey7O53lCpY09Ce+BWkx483D5FdnNwE/N7O+ENzPXNQ6RjKitQhERySu64hIRkbyixCUiInlFiUtERPKKEpeIiOQVJS4REckrSlwiIpJXlLhERCSv/H+ttDP8xRaCigAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Calculate the mean values for 'bnp' only\n",
    "mean_bnp_values = hf_ce_lab.groupby(['dx', 'label_ce'])['value'].mean().reset_index()\n",
    "\n",
    "# Create a bar plot for 'bnp'\n",
    "plt.figure(figsize=(10, 6))\n",
    "ax = mean_bnp_values.pivot(index='label_ce', columns='dx', values='value').plot(kind='bar', rot=0)\n",
    "plt.xlabel('Measurement Result')\n",
    "plt.ylabel('Mean Value')\n",
    "plt.title('Mean Measurements Value Stratified by Dx and Measurements')\n",
    "plt.legend(title='dx')\n",
    "\n",
    "# Add direct labels (values) on top of the bars\n",
    "for i, bar in enumerate(ax.patches):\n",
    "    height = bar.get_height()\n",
    "    plt.text(\n",
    "        bar.get_x() + bar.get_width() / 2, height + 0.1, f'{mean_bnp_values.iloc[i, 2]:.2f}',\n",
    "        ha='center', va='bottom', fontsize=8, color='black'\n",
    "    )\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 219,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Figure size 864x432 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAsIUlEQVR4nO3deXwV5dn/8c+VsC8GQbQICKhUFhWUWBaBnEeLiAsgxmqsLQg8PD8poK0V0+qj1mpreYrWFlurQgErbliUxR0NqBQqyiKKcQM0CmFTJGjYcv3+mAkeQoCTQHIm5Pt+vc7rnJm5Z+aas8x17nvumTF3R0REJGpSkh2AiIhIaZSgREQkkpSgREQkkpSgREQkkpSgREQkkpSgREQkkpSgREphZpeY2WdmVmBmZ0QgntvM7J/JjuNQmNmPzezFuOGzzezD8D0eaGbPmdngci47x8yG72dalX/vqislqCOYma02s2/DHUC+mf3DzBokO65iEd9x/BEY5e4N3H1JyYkWGGVmy83sGzNbF+4kr0hCrGVmZj3NbIGZbTGzzWb2hpmdFU4bYmavH+LyW5uZm1mN4nHu/oi7nxdX7HZgQvgeP+3u/dx9yqGst6LF/aa2mtlX4Xv4/8xM+9IKoDf1yHexuzcAzgTOAm4uy8zhjrg6fk9aAe8eYPqfgeuA64EmQHOC9/b8Co+sDEr7/MzsKGA28BegMUHsvwG2l2G5qYchvIO9x1F1sbs3JIj/LuBGYGJyQzpCubseR+gDWA38MG74/4DZ4etuwALgK2AZEIsrlwPcCbwBfAucDHQEXgI2A/nAr8OyKUA28DGwCXgCaBxOaw04MBj4FNgI3BROOx/YAewECoBl4firgZXAVuAT4H9KbNNYYC3wBTA8XP7J4bTaBDWfT8MY7wfq7ue9SSFIKGuA9cBUIC1cRkG43G3Ax6XM+31gN5B+kPc/jWDHtRb4HLgDSA2nDQFeD+P9ElgF9Iubtw0wL3wfXgImAP+Mm16mz69EXOnAV/uJuT1QGG5fQXE5YDLwN+DZ8H35IXAhsAT4GvgMuC1uOZ+G72FB+OhevM3h9I+BojC+gvB9zwGGxy1jaPhd+BJ4AWgVN60P8D6wJXxv5sXPW2KbbgOmA4+H7+fbQKdw2g3AUyXK/wX4UyK/qXDcD8JtORWoBSwFRofTUsPP4ZZk7w+q4iPpAehRgR9u3I8JaEnwb/W3BP+YNwEXEOyo+4TDTcOyOeEOpiNQA2hIsJO9HqgTDncNy14HLARahDuZvwOPhtNahzupB4G6QCeCf+ntw+m3EbfTDcddCJwEGJABfAOcGU47H1gXxlUPeJi9E9SfgJkEtYKGwCzg9/t5b4YCHwEnAg2AfwEPx03fs9xS5v1/wOoE3v+nw/ejPnAs8B/ChEuws94J/He4E7uGIOlaOP3fwN3he9qbYMf6z3BaWT+/miXiOiosPwXoBxxdYvoQwkQSN24yQTI4O1xnHSAGnBYOn07wp2Bgic++xv6Wy75/oHIIkwwwMPx82ofbcDOwIJx2DEFSzARqAj8HdnHgBLUzrvwvCf4Q1ASaESTcRmHZGgR/WLoc7DdVYvynwDXh61MJkmp74CaC30dqsvcHVfGR9AD0qMAPN/gxFRD8y14D/JUgUdxI3M44LPsCMDh8nQPcHjctC1iyn3WsBM6NG24W7gxqxO2kWsRN/w9wRfj6NkokqFKW/zRwbfh6EnEJh6Bm5+GzhTuak+KmdwdW7We5c4GRccOnFMcdDh8oQd0MLCwxLi98nwsJmn6OI0jGdePKZAGvhq+HAB/FTasXrvN7wAnhDrd+3PRpfJegyvT57Wcb2hMknbxwXTOB4+JiKy1BTT3IMv8E3BO+Lv7sy5ugngOGxU1LIfiz0gr4afz7H372eRw4QS0ssay1QK+4df13+Poi4L2D/KZKS1ALCVsHwuHrCWp4XwJty/sbru6P6nhsoboZ6O6N3L2Vu490928JfuSXhQd5vzKzr4CeBMml2Gdxr1sSNMmUphUwI245Kwmah46LK7Mu7vU3BDWWUplZPzNbGB64/4qglnBMOPn4EnHFv25KsJN/Ky6W58PxpTmeIGkXW0OQVI8rvfheNrH3e4W7twjjrE2ww2xF8A99bVw8fyeoSRVbFzf/N+HLBmFsX7r7thLxFSvr57cPd1/p7kPCuE8N1/mnA81Tcplm1tXMXjWzDWa2haBmeUzps5ZZK+DeuO3bTPC+NqfE98CDjHDA7S1RvoggoR0fjpoCXBW+voqgZl5WzcMYi00hSNLPuvuH5VieoE4S1dVnBP/AG8U96rv7XXFlvET5kw6wrH4lllXH3T9PII74dWBmtYGnCI7LHOfujQiOeVhYZC1BU2KxlnGvNxIcz+gYF0eaBx1ESvMFwU6wWHGtJT+BuF8BWphZ+gHKfEZQgzomLp6j3L1jAstfCxxtZvVLxBe/7LJ8fgfk7u8T1JBOPci8JcdPI6h5tXT3NIJjfrafsmX1GUFzaPw21nX3BQTvz57P3syMvb8LpYkvn0LwPfoiHPU0cLqZnUpQg3qkLIGGvR+bExxTLPZXgo4ofc2sZ1mWJ99Rgqqe/glcbGZ9zSzVzOqYWczMWuyn/Gzge2Z2nZnVNrOGZtY1nHY/cKeZtQIws6ZmNiDBOPKB1nG9zGoR1EA2ALvMrB8Q3y35CeBqM2tvZvWAW4onhP+KHwTuMbNjw1iam1nf/az7UeDnZtYm7Hr/O+Bxd991sKDdPZegNvSYmfUxs7phr7YecWXWAi8C483sKDNLMbOTzCwjgeWvARYDvzGzWuEO7uK4ImX9/PZiZu3M7Pri8mbWkqD5cWFYJJ8gAdc6yKIaApvdvdDMfgBcGTdtA0HHgRMTiakU9wO/MrOOYYxpZnZZOG0O0NHMBoXd2McQNI0eSJe48tcR/HlYCODuhQSdKKYB/3H3TxMJMPxcLwIeI2h+fScc/xOgC0GT5hhgikXo9I6qRAmqGnL3z4ABwK8JdiSfEfRmKvX74O5bCQ7EX0zQLPUh8F/h5HsJ/kW/aGZbCX70XUtbTimeDJ83mdnb4XrGECSiLwl2eDPj4niOoHv3qwQH0P8dTiruHn1jOH6hmX0NvExwbKk0kwiacuYTHDAvBEYnGDfAz8JY7iZo2skj6IByOcEBcwiOldQC3gu3ZzolmgYP4EqC93EzcCtBL0Og7J9fKbaGy15kZtsIPrMVBMdNIKghvgusM7ONB1jOSOD28HO/heBzK47xG8KehGEzXbcEYyuefwbwB4I/AV+H8fULp20ELiPo4r0JaEvQU+5AniH4bL4EfgIMcvedcdOnEHT4SKR5b1a4zZ8RdIK4m6D3KWZ2AkFT6U/dvcDdpxH82bgngeVKCcU9hkSqHDNrT7Djqp1IzUdkf8LE8j7wPXf/OtnxSEA1KKlSLLgEUS0zO5rgH/YsJSc5FGET8y+Ax5ScoqXCEpSZTTKz9Wa2Im5cYzN7yYLrb70U7mSKp/3KzD4ys9wDHDcQ+R+CZq2PCXoLXpPccKQqCzuifE3QhH1rksOREiqsic/MehOcgzPV3U8Nx40jOKh6l5llE5wgeKOZdSA4aP0Dgq6fLwPfd/fdFRKciIhEXoXVoNx9PnufFwDBgd3ii0FOIThbvHj8Y+6+3d1XERzo/kFFxSYiItFX4+BFDqvjwu63uPva4u7ABOcQLIwrlxeO24eZjQBGANSvX79Lu3btKjBcERGpaG+99dZGd9/npPrKTlD7Y6WMK7Xt0d0fAB4ASE9P98WLF1dkXCIiUsHMbE1p4yu7F1++mTUDCJ/Xh+Pz2PtM8PizvEVEpBqq7AQ1k+DWC4TPz8SNvyK8SkEbghPv/lPJsYmISIRUWBOfmT1KcDn+Y8wsj6AL513AE2Y2jOBs+8sA3P1dM3uC4Iz7XcDP1INPRKR6q7AE5e5Z+5l07n7K30lwaRQREUnAzp07ycvLo7CwMNmhJKROnTq0aNGCmjVrJlQ+Kp0kRESkjPLy8mjYsCGtW7cmuKh7dLk7mzZtIi8vjzZt2iQ0jy51JCJSRRUWFtKkSZPIJycAM6NJkyZlqu0pQYmIVGFVITkVK2usSlAiIhJJSlAiInJQQ4YMYfr06ZW6TiUoERE57HbtOvS74ChBiYgcYX7729/Srl07+vTpQ1ZWFn/84x/5+OOPOf/88+nSpQu9evXi/fffB4Ka0ZgxY+jRowcnnnjinlqSuzNq1Cg6dOjAhRdeyPr16/cs/6233iIjI4MuXbrQt29f1q5dC0AsFuPXv/41GRkZ3HvvvYe8HepmLiJyBFm8eDFPPfUUS5YsYdeuXZx55pl06dKFESNGcP/999O2bVsWLVrEyJEjeeWVVwBYu3Ytr7/+Ou+//z79+/cnMzOTGTNmkJubyzvvvEN+fj4dOnRg6NCh7Ny5k9GjR/PMM8/QtGlTHn/8cW666SYmTZoEwFdffcW8efMOy7YoQYmIHEFef/11BgwYQN26dQG4+OKLKSwsZMGCBVx22WV7ym3fvn3P64EDB5KSkkKHDh3Iz88HYP78+WRlZZGamsrxxx/POeecA0Bubi4rVqygT58+AOzevZtmzZrtWdbll19+2LZFCUpE5AhS2k1oi4qKaNSoEUuXLi11ntq1a5c6f2ndwt2djh078u9//7vUZdWvX7+MEe+fjkGJiBxBevbsyaxZsygsLKSgoIA5c+ZQr1492rRpw5NPPgkESWbZsmUHXE7v3r157LHH2L17N2vXruXVV18F4JRTTmHDhg17EtTOnTt59913K2RblKBERI4gZ511Fv3796dTp04MGjSI9PR00tLSeOSRR5g4cSKdOnWiY8eOPPPMMwdcziWXXELbtm057bTTuOaaa8jIyACgVq1aTJ8+nRtvvJFOnTrRuXNnFixYUCHbYqVVB6sK3bBQRKqzlStX0r59+33GFxQU0KBBA7755ht69+7NAw88wJlnnpmECPdVWsxm9pa7p5csq2NQIiJHmBEjRvDee+9RWFjI4MGDI5OcykoJSkTkCDNt2rRkh3BY6BiUiIhEkhKUiIhEkhKUiIhEkhKUiIhEkjpJiIgcIVpnzzmsy1t914UHLZOamsppp522Z/jpp5+mdevWh2X9SlAiIlJudevW3e8llA6VmvhERCSSVIMSEZFy+/bbb+ncuTMAbdq0YcaMGYdt2UpQIiJSbmriExGRakcJSkREIklNfCIiR4hEuoVXJapBiYhIuRUUFFTYspWgREQkkpSgREQkkpSgREQkkpKSoMzs52b2rpmtMLNHzayOmTU2s5fM7MPw+ehkxCYiItFQ6QnKzJoDY4B0dz8VSAWuALKBue7eFpgbDouISDWVrCa+GkBdM6sB1AO+AAYAU8LpU4CByQlNRESioNLPg3L3z83sj8CnwLfAi+7+opkd5+5rwzJrzezY0uY3sxHACIATTjihssIWEYm+29IO8/K2HLSImXHVVVfx8MMPA7Br1y6aNWtG165dmT179iGtPhlNfEcT1JbaAMcD9c3sqkTnd/cH3D3d3dObNm1aUWGKiEgC6tevz4oVK/j2228BeOmll2jevPlhWXYymvh+CKxy9w3uvhP4F9ADyDezZgDh8/okxCYiImXUr18/5swJbpb46KOPkpWVdViWm4wE9SnQzczqmZkB5wIrgZnA4LDMYOCZJMQmIiJldMUVV/DYY49RWFjI8uXL6dq162FZbjKOQS0ys+nA28AuYAnwANAAeMLMhhEkscsqOzYRESm7008/ndWrV/Poo49ywQUXHLblJuVise5+K3BridHbCWpTIiJSxfTv359f/vKX5OTksGnTpsOyTF3NXEREDtnQoUNJS0vjtNNOIycn57AsUwlKRORIkUC38IrSokULrr322sO6TCUoEREpt9JutxGLxYjFYoe8bF0sVkREIkkJSkREIkkJSkSkCnP3ZIeQsLLGqgQlIlJF1alTh02bNlWJJOXubNq0iTp16iQ8jzpJiIhUUS1atCAvL48NGzYkO5SE1KlThxYtWiRcXglKRKSMpk6dypQpU9i9ezd33nknN910EwBr1qzh2muv5brrrttTdsqUKUycOJHCwkKGDBnCyJEjmTVrFnfeeSdmRmZmJtdffz1r1qzhxz/+MU2aNGHGjBns2rWLa665hokTJ+43jpo1a9KmTZuK3tyksapQNdyf9PR0X7x4cbLDEJFq5PPPP+eWW24pNXEMGDCA8ePHc/LJJ+8Zt3PnTmrWrMnu3bs588wzWbZsGZ9++inNmzcnJSWFWCzGzJkzeeihh+jevTs5OTn07duXRYsWccYZZ9C9e/fK3LykMLO33D295HgdgxIRKYMXXniB3bt3c+655zJ69Gh2794NwLZt21i3bt1eyQmCWg7Ajh07aN++PRDcyy41NRUzIzU1lZSUFOrVq0dhYSHbtm0jJSWFpUuXVovkdCBKUCIiZZCfn8+OHTuYO3cu9erV45lnghsvPPfcc5x//vmlznP77bfTtm1bunTpstf45557jpNPPpmGDRuSlZXF1KlTMTNycnLIyspizJgx3HHHHRW+TVGlBCUiUgZpaWlkZGQAcM4557By5UoAZsyYwaBBg0qd55ZbbuHjjz/mySef3HMh1U8++YRx48Zxzz33ANCoUSMmT55MdnY2H3zwAbm5uWRmZlJUVERubm4lbFn0KEGJiJRBjx49WL58OQBLly6lTZs27Ny5k5UrV9KpU6d9ym/fvh2AWrVqUa9ePWrXrs3WrVsZMmQIEydOpH79+nuVnzBhAqNGjdrT1JeSklLq5YSqAyUoEZEy6Ny5M3Xr1iUWi/Hmm2+SmZnJK6+8wjnnnLNXudGjRwPw+9//nlgsxtlnn83ll19OgwYNmDBhAqtWrWLo0KHEYjFWrVoFwJYtW8jLy6Njx45ceumljB07lkWLFtG5c+fK3sxIUC8+ERFJKvXiExGRKkUJSkREIkkJSkREIkkJSkREIknX4hMRiXdbWrIjKJ8k3u69oqgGJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikZRQgrLAVWZ2Szh8gpn9oGJDExGR6izRGtRfge5AVji8FbivQiISEREh8QTV1d1/BhQCuPuXQK3yrtTMGpnZdDN738xWmll3M2tsZi+Z2Yfh89HlXb6IiFR9iSaonWaWCjiAmTUFig5hvfcCz7t7O6ATsBLIBua6e1tgbjhcba1evZrjjjuOWCzGeeedB0BaWhqxWIxYLMbmzZv3Kv/ggw/SrVs3unXrxrRp0wBYt24dsViMjIwMhg4dCsCaNWvo2bMnAwYMoKioiB07djBs2LDK3TgRkQQkmqD+DMwAjjWzO4HXgd+VZ4VmdhTQG5gI4O473P0rYAAwJSw2BRhYnuUfSfr06UNOTg4vvvgiAKeddho5OTnk5OTQuHHjfcouXLiQ1157jfHjxwMwbdo0hg4dyrx580hNTWXZsmVMnz6dcePG0bVrV5YsWcJDDz3E8OHDK33bREQOJqEE5e6PAGOB3wNrgYHu/mQ513kisAH4h5ktMbOHzKw+cJy7rw3XtxY4trSZzWyEmS02s8UbNmwoZwhVw6uvvkqvXr245557AFi5ciW9evUiOzsbd9+rbOvWrQGoUaMGqampALRr146tW7cCsHXrVho1akS9evUoLCxk27ZtpKSksHTpUrp37155GyUikqBEe/E1BtYDjwLTgHwzq1nOddYAzgT+5u5nANsoQ3Oeuz/g7ununt60adNyhhB9zZo144MPPuDVV1/l5ZdfZvny5Xz44YfMnz+fL7/8klmzZpU63/3338/AgQMBSE9PZ9KkSbRv355atWrRqlUrsrKymDp1KmZGTk4OWVlZjBkzhjvuuKMSt05E5OASbeJ7m6DW8wHwYfh6lZm9bWZdyrjOPCDP3ReFw9MJEla+mTUDCJ/Xl3G5R5TatWtTv359atSowUUXXcSKFSto3LgxZsbAgQNZsWLFPvMsWrSIZ599lhtvvBGA8ePHk52dzcqVK0lLS2P+/Pk0atSIyZMnk52dzQcffEBubi6ZmZkUFRWRm5tb2ZspIrJfiSao54EL3P0Yd28C9AOeAEYSdEFPmLuvAz4zs1PCUecC7wEzgcHhuMHAM2VZ7pGmuGkO4I033qB58+bs3r17z/BJJ520V/nPP/+c66+/nilTpuxp4nP3PceqmjRpwpYt393QbMKECYwaNWpPU19KSgoFBQUVvVkiIglLNEGlu/sLxQPu/iLQ290XArXLsd7RwCNmthzoTNDh4i6gj5l9CPQJh6ut1157jS5dutCjRw+OP/540tLSOOuss+jVqxefffYZmZmZAIwePRqA22+/nfz8fAYNGkQsFuPbb79l5MiR3H777WRkZLB8+XL69u0LwJYtW8jLy6Njx45ceumljB07lkWLFtG5c+dkba6IyD6s5MH2UguZvUjQ9fuxcNTlBEnkfOBNdz+zwiI8gPT0dF+8eHEyVi0iRyrd8r3Smdlb7p5ecnyiNagrgRbA0wRNbyeE41KBHx2mGEVERPaokUghd99I0CxXmo8OXzgiIiKBhBJUeOWIsUBHoE7xeHc/p4LiEhGRai7RJr5HgPeBNsBvgNXAmxUUk4iISGI1KKCJu080s2vdfR4wz8zmVWRgR7rW2XOSHUK5rL7rwmSHICLVRKIJamf4vNbMLgS+IOg0ISIiUiESTVB3mFkacD3wF+Ao4LqKCkpERCTRBPWlu28BtgD/BWBmZ1dYVCIiUu0l2kniLwmOExEROSwOWIMys+5AD6Cpmf0ibtJRBCfpioiIVIiDNfHVAhqE5RrGjf8ayKyooERERA6YoOK6lE929zWVFJOIiEjCnSRqm9kDQOv4eXQlCRERqSiJdpJ4ElgC3AzcEPcQKdXdd99Nz5499ww/9dRTtGzZcp9yd911F7FYjFgsRv369dm8eTOzZs2iW7dudO/enfHjxwOwZs0aevbsyYABAygqKmLHjh0MGzas0rZHRCpfojWoXe7+twqNRI4Y27dvZ9myZXuNmz59eqkJKjs7m+zsbDZu3EhmZiaNGzemU6dOvPHGG6SkpBCLxRg+fDjTp09n3Lhx5OTksGTJEhYtWsTw4cMra5NEJAkSrUHNMrORZtbMzBoXPyo0MqmyHnroIQYPHrxneM6cOfTp04eUlP1/3WbOnEn//v0BOOGEE0hNTcXMSE1NJSUlhXr16lFYWLjnDsBLly6le/fuFb4tIpI8iSaowQRNeguAt8KH7hQo+9i5cyfz5s3jnHO+Ozw5ZcoUrrrqqgPON2PGDC655JK9xj333HOcfPLJNGzYkKysLKZOnYqZkZOTQ1ZWFmPGjOGOO+6okO0QkeRL9H5QbSo6EDkyPPzww1x55ZV7hl955RV69OhBrVq19jtPQUEBGzdupE2b775mn3zyCePGjWP27NkANGrUiMmTJ1NQUMANN9xA3bp1yczMJCcnh9zcXE455ZSK2ygRSYqEalBmVs/Mbg578mFmbc3soooNTaqi3Nxc/va3v3H++efz7rvv8s477zBz5sw9wzfffPM+8zz77LP069dvz/DWrVsZMmQIEydOpH79+nuVnTBhAqNGjdrT1JeSkkJBQUGFb5eIVL5Em/j+AewguKoEQB6gthXZxx/+8AdeeOEFnn/+eTp27Mi1117LK6+8sme4uElu9OjvbtA8Y8YMBg0atGd4woQJrFq1iqFDhxKLxVi1ahUAW7ZsIS8vj44dO3LppZcyduxYFi1aROfOnSt1G0Wkcpi7H7yQ2WJ3TzezJe5+Rjhumbt3qvAIDyA9Pd0XL66ah8J0PyiRiLotLdkRlM9tW5IdQbmZ2Vvunl5yfKI1qB1mVhfwcGEnAdsPY3wiIiJ7SfQ8qFuB54GWZvYIcDYwpKKCEhERSbQX30tm9jbQDTDgWnffWKGRiYhItZZoL75LCK4mMcfdZwO7zGxghUYmIiLVWsJNfO4+o3jA3b8ys1uBpyskKokuHUAWkUqSaCeJ0solmtxERETKLNEEtdjM7jazk8zsRDO7h+ByRyIiIhUi0QQ1muBE3ceBJ4BvgZ9VVFAiIiIHbaYzs1TgGXf/YSXEIyIiAiRQg3L33cA3ZlZFj46LiEhVlGhHh0LgHTN7CdhWPNLdx5R3xWHNbDHwubtfFN5f6nGC28qvBn7k7l+Wd/kiIlK1JXoMag7wv8B8vrsf1KF2krgWWBk3nA3Mdfe2wNxwWEREqqlEryQxJbwW3wnunnuoKzWzFsCFwJ3AL8LRA4BY+HoKkAPceKjrEhGRqinRK0lcDCwluB4fZtbZzGYewnr/BIwFiuLGHefuawHC52P3E8sIM1tsZos3bNhwCCGIiEiUJdrEdxvwA+ArAHdfCpTrLrvhjQ7Xu3u5mgjd/QF3T3f39KZNm5ZnESIiUgUk2klil7tvMbP4cQe/kVTpzgb6m9kFQB3gKDP7J5BvZs3cfa2ZNQPWl3P5IiJyBEi0BrXCzK4EUsPbvf8FWFCeFbr7r9y9hbu3Bq4AXnH3q4CZwOCw2GDgmfIsX0SqhhUrVtCjRw969erF1VdfzapVq+jVqxe9e/fmyiuvZPfu3XuVLyoq4pe//CXnnnsul112GQDr1q0jFouRkZHB0KFDAVizZg09e/ZkwIABFBUVsWPHDoYNG1bp2yeHrixXkuhIcJPCacAW4LrDHMtdQB8z+xDoEw6LyBHqlFNOYcGCBbz22msAbNy4kVmzZjF//nzatGnDs88+u1f56dOn0759e+bOncuTTz4JwLRp0xg6dCjz5s0jNTWVZcuWMX36dMaNG0fXrl1ZsmQJDz30EMOHD6/07ZNDd8AEZWZ1zOw6YBzwKdDd3c9y95vdvfBQV+7uOe5+Ufh6k7uf6+5tw+fNh7p8EYmumjVr7nldu3ZtWrZsSaNGjQCoUaMGqampe5WfPXs27733HrFYjAcffBCAdu3asXXrVgC2bt1Ko0aNqFevHoWFhWzbto2UlBSWLl1K9+7dK2ej5LA6WA1qCpAOvAP0A/5Y4RGJSLUxc+ZMTj31VNavX0+TJk0A+OKLL3j55Zc577zz9iqbn5/PKaecwssvv8wjjzxCfn4+6enpTJo0ifbt21OrVi1atWpFVlYWU6dOxczIyckhKyuLMWPGcMcddyRjE+UQHCxBdXD3q9z970Am0LsSYhKRaqJ///6sWLGC5s2bM3v2bLZv387gwYN58MEHqVFj7z5caWlpZGRkUKNGDbp3785HH33E+PHjyc7OZuXKlaSlpTF//nwaNWrE5MmTyc7O5oMPPiA3N5fMzEyKiorIzT3k0zilEh0sQe0sfuHuuyo4FhGpRrZv377n9VFHHUXdunUZMWIEI0eOpEOHDvuU79GjB8uXLwdg+fLltGrVCnencePGADRp0oQtW767MeWECRMYNWrUnqa+lJQUCgoKKnir5HA6WILqZGZfh4+twOnFr83s68oIUESOTM8//zwZGRlkZGSQn59Pw4YN+de//sW9995LLBZjxozgJt6jR48GYNiwYTz66KOcffbZdO3alRYtWjBy5Ehuv/12MjIyWL58OX379gVgy5Yt5OXl0bFjRy699FLGjh3LokWL6Ny5c7I2V8rB3Mt7OlPypaen++LFi5MdRrm0zp6T7BDKZXWdK5MdQvnolu+SqNuq6I0bqvB33Mzecvf0kuMT7WYuIiJSqZSgREQkkpSgREQkkpSgREQkkhK9WKyISJlU3Y5AyY5AiqkGJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikaQEJSIikVTpCcrMWprZq2a20szeNbNrw/GNzewlM/swfD66smMTEZHoSEYNahdwvbu3B7oBPzOzDkA2MNfd2wJzw2EREammKj1Buftad387fL0VWAk0BwYAU8JiU4CBlR2biIhER1KPQZlZa+AMYBFwnLuvhSCJAcfuZ54RZrbYzBZv2LCh0mIVEZHKlbQEZWYNgKeA69z960Tnc/cH3D3d3dObNm1acQGKiEhSJSVBmVlNguT0iLv/Kxydb2bNwunNgPXJiE1ERKIhGb34DJgIrHT3u+MmzQQGh68HA89UdmwiIhIdNZKwzrOBnwDvmNnScNyvgbuAJ8xsGPApcFkSYhMRkYio9ATl7q8Dtp/J51ZmLCIiEl26koSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiESSEpSIiERS5BKUmZ1vZrlm9pGZZSc7HhERSY5IJSgzSwXuA/oBHYAsM+uQ3KhERCQZIpWggB8AH7n7J+6+A3gMGJDkmEREJAlqJDuAEpoDn8UN5wFd4wuY2QhgRDhYYGa5lRSbAAbHABuTHUeZ/caSHYFUEfqOJ0Wr0kZGLUGV9g77XgPuDwAPVE44UpKZLXb39GTHIVJR9B2Pjqg18eUBLeOGWwBfJCkWERFJoqglqDeBtmbWxsxqAVcAM5Mck4iIJEGkmvjcfZeZjQJeAFKBSe7+bpLDkr2peVWOdPqOR4S5+8FLiYiIVLKoNfGJiIgASlAiIhJRSlAiIgkysyp9slFVowQlh4V+uHIkM7PTAVwH7SuVEpQcMjNrD9xnZpHqFSpyOJhZX+DZ8HtePE5/yCqBEpQcEjM7BZgILHf3XcmOR+RwMrOLgf8FfuzuK82sIagmVVmUoKTczOwEYD5wn7vfb2Y1zWxQeFV6kSrNzI4C/gJ84O7zzKwFMKW4uU8qnhKUHIqmQC5QJxyeAbRz993JC0nk0JlZS3f/Gvgx8H0z+w3wMDDX3ZcnN7rqQwlKyszMOpjZ1cDbwI3AD83sc2CFu/8uudGJHBozuxB42syOd/c3gBuAnwAF7n5fWEbHWyuBEpSUiZl9H3gEqBu2w78J3Au8R9wtCsxM3y2pcszsfOAm4Nfu/oWZ1XH3fxPUpI4LL8VWfFk2dZSoYNqJSMLCDhHTgT+4+1/Df5Exd18I3AqcYWb/C+DuRUkMVaTMzOwM4HHgd+7+gpmdBMw1s1ZhkhoDXGVmN4I6SlQGJShJiJnVBcYCq939sXD0LKBf+PpNggPKXcL2epEqI6wNHQU8DXzPzDoQXDT2aXdfAxD+Ebse6GtmR6sGVfF0sVhJWNjl9mxgA3AR8Ia73xw3vTbQBfjG3ZcmJUiRMjKzlOIav5ldApwF/Dcwzt3/z8zM3d3MTgVWAbvdvTCJIVcbOtAnB1X8A3b3WWZWRHDA2IDfx5XJAE4D/qZefFKVxCWnq4C2wFygGeBmVt/dt4Wdgq4BLnD3qnc7+CpKCUoOKPz3WGRmJxLUnF4guPPxcGC4mf0VaA9MAG5WcpKqwszOAlLdfWH4ehBwhbvvCM/l6weMMLMCYDAwVMmpcqmJT/aruOYUXurlPmAR0AIYCJwIXAXUBWLAz939ueLmkCSFLJKQsLfeHcAIIB/4O1AP+Km754W9UM8BhgG9gfN089TKp04Ssg8zqwdB00d41vx5wBDgp8ACYCHwATATOAb4hbs/F86j5CSRFv7h+hMwyt3fBr4g6ByxCehtZseEzX6vEHT86abklByqQclewq7kvwB+C3wNvAV8CQxw97Vhmb8TXP5lvJk1cfdNqjlJVWBmDYBJBCfdDjWz44CpwCjgTOBC4DmCK0asT16kAqpBSRwza0dwOZdF7p4XXuplEEHTR2Zc0feBRgDuvil8VnKSSAvPa9pFcGL5RjO7kyAZzXL3D939cWAOcDmQoZPNk0+dJAQAMzueoJljsrtPCk/CzXT3x8xsCPCYmXUi6OH0Y4ITc0WqBDM7muBE2wLgN0ARcC2wmuB8JwDc/XEz2wX8WyebJ5/+IUixxsAy4DMza0VwEm4nAHdfTFCDOpugg8SP3H2O/mFK1MWdTPsV8DzB6RE3AouBe4BPgdHhlfkBcPen3P2LSg5VSqEdTDVnZk3N7AbgQ+DPQDfgWSDP3X8VlqkZnnibRXCu0wWgyxlJlVB86xcLO/IsI/iTdQOwBHiSoGfq4LAVQSJECUrahY8bCC5XNIWgY8RSM2saltllZqlhkhoGXB02mYhElpkdA3xkZseGPVKPJ2jmW0RwWaNsgu/800ADQFeHiBj14qvmzKwmcDpBF/IvgLuBHwA/Imj+eNLdPw3LFp8XVdvdtycrZpFEhZfn+j1wBUHniH+5+31mFgMuBnYCtxCcsPttsuKU0qmTRDVkZm2Aze6+xd13mtkyoAdBO30dgh90EXA1cIWZ3evu2+Oa9HYkI26Rsgovz7UTWE5wC437wkmvAbWBDOAoXSEimlSDqobM7IcEt804OrwI5tPAJ8CjBP80NwP/R1CT2uzu7yUrVpHDwcz6EJx029Xdt8SNr+fu3yQvMjkQJahqKrzUy18JOkcsdPdbw/HnApcB64Df6PwmOVKYWT+CK0h0d/fNSQ5HEqAEVY2FyegFoGZYkyruknsO8IW7r0xedCKHn5kNIDiHL53g/HLtACNMCaqaM7MLCA4ed1c7vFQHZtbA3QuSHYccnDpJVHPu/qyZ7QbeNbN27v5lsmMSqUhKTlWHalACgJldCGxz95xkxyIiAkpQUoKuSi4iUaEEJSIikaRLHYmISCQpQYmISCQpQYmISCSpm7lIRITd/d8BahLc+XUK8Cfd1kSqKyUokej41t07A5jZscA0IA3dvViqKTXxiUSQu68HRgCjLPALM5sEYGanmdkKM6uX3ChFKpYSlEhEufsnBL/RYwkucnqymV0C/AP4H12FW450auITiTYDCG8UOYTgvkZ/d/c3khqVSCVQDUokoszsRGA3sD4c1RYoAI5PWlAilUgJSiSCzKwpcD8wIbwVShrBVed7A03MLDOpAYpUAl3qSCQiSulm/jBwd9i8NwlY6u5/NrOWwKtAj7AzhcgRSQlKREQiSU18IiISSUpQIiISSUpQIiISSUpQIiISSUpQIiISSUpQIiISSUpQIiISSf8fGIDaiyRcFVwAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Calculate the percentage of 'gender' values stratified by 'dx'\n",
    "percentage_values = (\n",
    "    hf_ce_lab.groupby(['dx', 'gender'])['subject_id'].count() / hf_ce_lab.groupby('dx')['subject_id'].count() * 100\n",
    ")\n",
    "percentage_values = percentage_values.reset_index()\n",
    "\n",
    "# Rename columns for clarity\n",
    "percentage_values.columns = ['dx', 'gender', 'percentage']\n",
    "\n",
    "# Create a bar plot for percentages stratified by 'dx'\n",
    "plt.figure(figsize=(12, 6))\n",
    "ax = percentage_values.pivot(index='dx', columns='gender', values='percentage').plot(kind='bar', stacked=False, rot=45)\n",
    "plt.xlabel('Dx')\n",
    "plt.ylabel('Percentage')\n",
    "plt.title('Percentage of Gender Stratified by Dx')\n",
    "\n",
    "# Add direct labels (values) on top of the bars for both genders\n",
    "for i, bar in enumerate(ax.patches):\n",
    "    height = bar.get_height()\n",
    "    plt.text(\n",
    "        bar.get_x() + bar.get_width() / 2, height + 2, f'{height:.2f}%',\n",
    "        ha='center', va='bottom', fontsize=8, color='black'\n",
    "    )\n",
    "\n",
    "# Set the y-axis limit to 100%\n",
    "plt.ylim(0, 100)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
